updates

2025-12-01 06:50:10 +02:00
parent 91f51bc6fe
commit 62c1fe5951
4682 changed files with 544807 additions and 31208 deletions
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/init.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/init.py
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/init.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/init.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_aline.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_aline.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_bllip.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_bllip.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_brill.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_brill.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_cfd_mutation.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_cfd_mutation.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_cfg2chomsky.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_cfg2chomsky.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_chunk.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_chunk.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_classify.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_classify.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_collocations.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_collocations.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_concordance.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_concordance.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_corenlp.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_corenlp.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_corpora.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_corpora.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_corpus_views.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_corpus_views.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_data.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_data.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_disagreement.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_disagreement.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_distance.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_distance.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_downloader.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_downloader.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_freqdist.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_freqdist.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_hmm.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_hmm.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_json2csv_corpus.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_json2csv_corpus.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_json_serialization.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_json_serialization.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_metrics.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_metrics.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_naivebayes.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_naivebayes.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_nombank.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_nombank.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_pl196x.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_pl196x.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_pos_tag.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_pos_tag.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_ribes.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_ribes.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_rte_classify.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_rte_classify.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_seekable_unicode_stream_reader.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_seekable_unicode_stream_reader.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_senna.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_senna.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_stem.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_stem.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_tag.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_tag.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_tgrep.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_tgrep.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_tokenize.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_tokenize.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_twitter_auth.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_twitter_auth.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_util.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_util.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_wordnet.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/pycache/test_wordnet.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/init.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/init.py
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/pycache/init.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/pycache/init.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/pycache/test_counter.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/pycache/test_counter.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/pycache/test_models.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/pycache/test_models.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/pycache/test_preprocessing.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/pycache/test_preprocessing.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/pycache/test_vocabulary.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/pycache/test_vocabulary.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/test_counter.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/test_counter.py
@@ -0,0 +1,116 @@
+# Natural Language Toolkit: Language Model Unit Tests
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+import unittest
+
+import pytest
+
+from nltk import FreqDist
+from nltk.lm import NgramCounter
+from nltk.util import everygrams
+
+
+class TestNgramCounter:
+    """Tests for NgramCounter that only involve lookup, no modification."""
+
+    @classmethod
+    def setup_class(self):
+        text = [list("abcd"), list("egdbe")]
+        self.trigram_counter = NgramCounter(
+            everygrams(sent, max_len=3) for sent in text
+        )
+        self.bigram_counter = NgramCounter(everygrams(sent, max_len=2) for sent in text)
+        self.case = unittest.TestCase()
+
+    def test_N(self):
+        assert self.bigram_counter.N() == 16
+        assert self.trigram_counter.N() == 21
+
+    def test_counter_len_changes_with_lookup(self):
+        assert len(self.bigram_counter) == 2
+        self.bigram_counter[50]
+        assert len(self.bigram_counter) == 3
+
+    def test_ngram_order_access_unigrams(self):
+        assert self.bigram_counter[1] == self.bigram_counter.unigrams
+
+    def test_ngram_conditional_freqdist(self):
+        case = unittest.TestCase()
+        expected_trigram_contexts = [
+            ("a", "b"),
+            ("b", "c"),
+            ("e", "g"),
+            ("g", "d"),
+            ("d", "b"),
+        ]
+        expected_bigram_contexts = [("a",), ("b",), ("d",), ("e",), ("c",), ("g",)]
+
+        bigrams = self.trigram_counter[2]
+        trigrams = self.trigram_counter[3]
+
+        self.case.assertCountEqual(expected_bigram_contexts, bigrams.conditions())
+        self.case.assertCountEqual(expected_trigram_contexts, trigrams.conditions())
+
+    def test_bigram_counts_seen_ngrams(self):
+        assert self.bigram_counter[["a"]]["b"] == 1
+        assert self.bigram_counter[["b"]]["c"] == 1
+
+    def test_bigram_counts_unseen_ngrams(self):
+        assert self.bigram_counter[["b"]]["z"] == 0
+
+    def test_unigram_counts_seen_words(self):
+        assert self.bigram_counter["b"] == 2
+
+    def test_unigram_counts_completely_unseen_words(self):
+        assert self.bigram_counter["z"] == 0
+
+
+class TestNgramCounterTraining:
+    @classmethod
+    def setup_class(self):
+        self.counter = NgramCounter()
+        self.case = unittest.TestCase()
+
+    @pytest.mark.parametrize("case", ["", [], None])
+    def test_empty_inputs(self, case):
+        test = NgramCounter(case)
+        assert 2 not in test
+        assert test[1] == FreqDist()
+
+    def test_train_on_unigrams(self):
+        words = list("abcd")
+        counter = NgramCounter([[(w,) for w in words]])
+
+        assert not counter[3]
+        assert not counter[2]
+        self.case.assertCountEqual(words, counter[1].keys())
+
+    def test_train_on_illegal_sentences(self):
+        str_sent = ["Check", "this", "out", "!"]
+        list_sent = [["Check", "this"], ["this", "out"], ["out", "!"]]
+
+        with pytest.raises(TypeError):
+            NgramCounter([str_sent])
+
+        with pytest.raises(TypeError):
+            NgramCounter([list_sent])
+
+    def test_train_on_bigrams(self):
+        bigram_sent = [("a", "b"), ("c", "d")]
+        counter = NgramCounter([bigram_sent])
+        assert not bool(counter[3])
+
+    def test_train_on_mix(self):
+        mixed_sent = [("a", "b"), ("c", "d"), ("e", "f", "g"), ("h",)]
+        counter = NgramCounter([mixed_sent])
+        unigrams = ["h"]
+        bigram_contexts = [("a",), ("c",)]
+        trigram_contexts = [("e", "f")]
+
+        self.case.assertCountEqual(unigrams, counter[1].keys())
+        self.case.assertCountEqual(bigram_contexts, counter[2].keys())
+        self.case.assertCountEqual(trigram_contexts, counter[3].keys())
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/test_models.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/test_models.py
@@ -0,0 +1,611 @@
+# Natural Language Toolkit: Language Model Unit Tests
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+import math
+from math import fsum as sum
+from operator import itemgetter
+
+import pytest
+
+from nltk.lm import (
+    MLE,
+    AbsoluteDiscountingInterpolated,
+    KneserNeyInterpolated,
+    Laplace,
+    Lidstone,
+    StupidBackoff,
+    Vocabulary,
+    WittenBellInterpolated,
+)
+from nltk.lm.preprocessing import padded_everygrams
+
+
+@pytest.fixture(scope="session")
+def vocabulary():
+    return Vocabulary(["a", "b", "c", "d", "z", "<s>", "</s>"], unk_cutoff=1)
+
+
+@pytest.fixture(scope="session")
+def training_data():
+    return [["a", "b", "c", "d"], ["e", "g", "a", "d", "b", "e"]]
+
+
+@pytest.fixture(scope="session")
+def bigram_training_data(training_data):
+    return [list(padded_everygrams(2, sent)) for sent in training_data]
+
+
+@pytest.fixture(scope="session")
+def trigram_training_data(training_data):
+    return [list(padded_everygrams(3, sent)) for sent in training_data]
+
+
+@pytest.fixture
+def mle_bigram_model(vocabulary, bigram_training_data):
+    model = MLE(2, vocabulary=vocabulary)
+    model.fit(bigram_training_data)
+    return model
+
+
+@pytest.mark.parametrize(
+    "word, context, expected_score",
+    [
+        ("d", ["c"], 1),
+        # Unseen ngrams should yield 0
+        ("d", ["e"], 0),
+        # Unigrams should also be 0
+        ("z", None, 0),
+        # N unigrams = 14
+        # count('a') = 2
+        ("a", None, 2.0 / 14),
+        # count('y') = 3
+        ("y", None, 3.0 / 14),
+    ],
+)
+def test_mle_bigram_scores(mle_bigram_model, word, context, expected_score):
+    assert pytest.approx(mle_bigram_model.score(word, context), 1e-4) == expected_score
+
+
+def test_mle_bigram_logscore_for_zero_score(mle_bigram_model):
+    assert math.isinf(mle_bigram_model.logscore("d", ["e"]))
+
+
+def test_mle_bigram_entropy_perplexity_seen(mle_bigram_model):
+    # ngrams seen during training
+    trained = [
+        ("<s>", "a"),
+        ("a", "b"),
+        ("b", "<UNK>"),
+        ("<UNK>", "a"),
+        ("a", "d"),
+        ("d", "</s>"),
+    ]
+    # Ngram = Log score
+    # <s>, a    = -1
+    # a, b      = -1
+    # b, UNK    = -1
+    # UNK, a    = -1.585
+    # a, d      = -1
+    # d, </s>   = -1
+    # TOTAL logscores   = -6.585
+    # - AVG logscores   = 1.0975
+    H = 1.0975
+    perplexity = 2.1398
+    assert pytest.approx(mle_bigram_model.entropy(trained), 1e-4) == H
+    assert pytest.approx(mle_bigram_model.perplexity(trained), 1e-4) == perplexity
+
+
+def test_mle_bigram_entropy_perplexity_unseen(mle_bigram_model):
+    # In MLE, even one unseen ngram should make entropy and perplexity infinite
+    untrained = [("<s>", "a"), ("a", "c"), ("c", "d"), ("d", "</s>")]
+
+    assert math.isinf(mle_bigram_model.entropy(untrained))
+    assert math.isinf(mle_bigram_model.perplexity(untrained))
+
+
+def test_mle_bigram_entropy_perplexity_unigrams(mle_bigram_model):
+    # word = score, log score
+    # <s>   = 0.1429, -2.8074
+    # a     = 0.1429, -2.8074
+    # c     = 0.0714, -3.8073
+    # UNK   = 0.2143, -2.2224
+    # d     = 0.1429, -2.8074
+    # c     = 0.0714, -3.8073
+    # </s>  = 0.1429, -2.8074
+    # TOTAL logscores = -21.6243
+    # - AVG logscores = 3.0095
+    H = 3.0095
+    perplexity = 8.0529
+
+    text = [("<s>",), ("a",), ("c",), ("-",), ("d",), ("c",), ("</s>",)]
+
+    assert pytest.approx(mle_bigram_model.entropy(text), 1e-4) == H
+    assert pytest.approx(mle_bigram_model.perplexity(text), 1e-4) == perplexity
+
+
+@pytest.fixture
+def mle_trigram_model(trigram_training_data, vocabulary):
+    model = MLE(order=3, vocabulary=vocabulary)
+    model.fit(trigram_training_data)
+    return model
+
+
+@pytest.mark.parametrize(
+    "word, context, expected_score",
+    [
+        # count(d | b, c) = 1
+        # count(b, c) = 1
+        ("d", ("b", "c"), 1),
+        # count(d | c) = 1
+        # count(c) = 1
+        ("d", ["c"], 1),
+        # total number of tokens is 18, of which "a" occurred 2 times
+        ("a", None, 2.0 / 18),
+        # in vocabulary but unseen
+        ("z", None, 0),
+        # out of vocabulary should use "UNK" score
+        ("y", None, 3.0 / 18),
+    ],
+)
+def test_mle_trigram_scores(mle_trigram_model, word, context, expected_score):
+    assert pytest.approx(mle_trigram_model.score(word, context), 1e-4) == expected_score
+
+
+@pytest.fixture
+def lidstone_bigram_model(bigram_training_data, vocabulary):
+    model = Lidstone(0.1, order=2, vocabulary=vocabulary)
+    model.fit(bigram_training_data)
+    return model
+
+
+@pytest.mark.parametrize(
+    "word, context, expected_score",
+    [
+        # count(d | c) = 1
+        # *count(d | c) = 1.1
+        # Count(w | c for w in vocab) = 1
+        # *Count(w | c for w in vocab) = 1.8
+        ("d", ["c"], 1.1 / 1.8),
+        # Total unigrams: 14
+        # Vocab size: 8
+        # Denominator: 14 + 0.8 = 14.8
+        # count("a") = 2
+        # *count("a") = 2.1
+        ("a", None, 2.1 / 14.8),
+        # in vocabulary but unseen
+        # count("z") = 0
+        # *count("z") = 0.1
+        ("z", None, 0.1 / 14.8),
+        # out of vocabulary should use "UNK" score
+        # count("<UNK>") = 3
+        # *count("<UNK>") = 3.1
+        ("y", None, 3.1 / 14.8),
+    ],
+)
+def test_lidstone_bigram_score(lidstone_bigram_model, word, context, expected_score):
+    assert (
+        pytest.approx(lidstone_bigram_model.score(word, context), 1e-4)
+        == expected_score
+    )
+
+
+def test_lidstone_entropy_perplexity(lidstone_bigram_model):
+    text = [
+        ("<s>", "a"),
+        ("a", "c"),
+        ("c", "<UNK>"),
+        ("<UNK>", "d"),
+        ("d", "c"),
+        ("c", "</s>"),
+    ]
+    # Unlike MLE this should be able to handle completely novel ngrams
+    # Ngram = score, log score
+    # <s>, a    = 0.3929, -1.3479
+    # a, c      = 0.0357, -4.8074
+    # c, UNK    = 0.0(5), -4.1699
+    # UNK, d    = 0.0263,  -5.2479
+    # d, c      = 0.0357, -4.8074
+    # c, </s>   = 0.0(5), -4.1699
+    # TOTAL logscore: −24.5504
+    # - AVG logscore: 4.0917
+    H = 4.0917
+    perplexity = 17.0504
+    assert pytest.approx(lidstone_bigram_model.entropy(text), 1e-4) == H
+    assert pytest.approx(lidstone_bigram_model.perplexity(text), 1e-4) == perplexity
+
+
+@pytest.fixture
+def lidstone_trigram_model(trigram_training_data, vocabulary):
+    model = Lidstone(0.1, order=3, vocabulary=vocabulary)
+    model.fit(trigram_training_data)
+    return model
+
+
+@pytest.mark.parametrize(
+    "word, context, expected_score",
+    [
+        # Logic behind this is the same as for bigram model
+        ("d", ["c"], 1.1 / 1.8),
+        # if we choose a word that hasn't appeared after (b, c)
+        ("e", ["c"], 0.1 / 1.8),
+        # Trigram score now
+        ("d", ["b", "c"], 1.1 / 1.8),
+        ("e", ["b", "c"], 0.1 / 1.8),
+    ],
+)
+def test_lidstone_trigram_score(lidstone_trigram_model, word, context, expected_score):
+    assert (
+        pytest.approx(lidstone_trigram_model.score(word, context), 1e-4)
+        == expected_score
+    )
+
+
+@pytest.fixture
+def laplace_bigram_model(bigram_training_data, vocabulary):
+    model = Laplace(2, vocabulary=vocabulary)
+    model.fit(bigram_training_data)
+    return model
+
+
+@pytest.mark.parametrize(
+    "word, context, expected_score",
+    [
+        # basic sanity-check:
+        # count(d | c) = 1
+        # *count(d | c) = 2
+        # Count(w | c for w in vocab) = 1
+        # *Count(w | c for w in vocab) = 9
+        ("d", ["c"], 2.0 / 9),
+        # Total unigrams: 14
+        # Vocab size: 8
+        # Denominator: 14 + 8 = 22
+        # count("a") = 2
+        # *count("a") = 3
+        ("a", None, 3.0 / 22),
+        # in vocabulary but unseen
+        # count("z") = 0
+        # *count("z") = 1
+        ("z", None, 1.0 / 22),
+        # out of vocabulary should use "UNK" score
+        # count("<UNK>") = 3
+        # *count("<UNK>") = 4
+        ("y", None, 4.0 / 22),
+    ],
+)
+def test_laplace_bigram_score(laplace_bigram_model, word, context, expected_score):
+    assert (
+        pytest.approx(laplace_bigram_model.score(word, context), 1e-4) == expected_score
+    )
+
+
+def test_laplace_bigram_entropy_perplexity(laplace_bigram_model):
+    text = [
+        ("<s>", "a"),
+        ("a", "c"),
+        ("c", "<UNK>"),
+        ("<UNK>", "d"),
+        ("d", "c"),
+        ("c", "</s>"),
+    ]
+    # Unlike MLE this should be able to handle completely novel ngrams
+    # Ngram = score, log score
+    # <s>, a    = 0.2, -2.3219
+    # a, c      = 0.1, -3.3219
+    # c, UNK    = 0.(1), -3.1699
+    # UNK, d    = 0.(09), 3.4594
+    # d, c      = 0.1 -3.3219
+    # c, </s>   = 0.(1), -3.1699
+    # Total logscores: −18.7651
+    # - AVG logscores: 3.1275
+    H = 3.1275
+    perplexity = 8.7393
+    assert pytest.approx(laplace_bigram_model.entropy(text), 1e-4) == H
+    assert pytest.approx(laplace_bigram_model.perplexity(text), 1e-4) == perplexity
+
+
+def test_laplace_gamma(laplace_bigram_model):
+    assert laplace_bigram_model.gamma == 1
+
+
+@pytest.fixture
+def wittenbell_trigram_model(trigram_training_data, vocabulary):
+    model = WittenBellInterpolated(3, vocabulary=vocabulary)
+    model.fit(trigram_training_data)
+    return model
+
+
+@pytest.mark.parametrize(
+    "word, context, expected_score",
+    [
+        # For unigram scores by default revert to regular MLE
+        # Total unigrams: 18
+        # Vocab Size = 7
+        # count('c'): 1
+        ("c", None, 1.0 / 18),
+        # in vocabulary but unseen
+        # count("z") = 0
+        ("z", None, 0 / 18),
+        # out of vocabulary should use "UNK" score
+        # count("<UNK>") = 3
+        ("y", None, 3.0 / 18),
+        # 2 words follow b and b occurred a total of 2 times
+        # gamma(['b']) = 2 / (2 + 2) = 0.5
+        # mle.score('c', ['b']) = 0.5
+        # mle('c') = 1 / 18 = 0.055
+        # (1 - gamma) * mle + gamma * mle('c') ~= 0.27 + 0.055
+        ("c", ["b"], (1 - 0.5) * 0.5 + 0.5 * 1 / 18),
+        # building on that, let's try 'a b c' as the trigram
+        # 1 word follows 'a b' and 'a b' occurred 1 time
+        # gamma(['a', 'b']) = 1 / (1 + 1) = 0.5
+        # mle("c", ["a", "b"]) = 1
+        ("c", ["a", "b"], (1 - 0.5) + 0.5 * ((1 - 0.5) * 0.5 + 0.5 * 1 / 18)),
+        # P(c|zb)
+        # The ngram 'zbc' was not seen, so we use P(c|b). See issue #2332.
+        ("c", ["z", "b"], ((1 - 0.5) * 0.5 + 0.5 * 1 / 18)),
+    ],
+)
+def test_wittenbell_trigram_score(
+    wittenbell_trigram_model, word, context, expected_score
+):
+    assert (
+        pytest.approx(wittenbell_trigram_model.score(word, context), 1e-4)
+        == expected_score
+    )
+
+
+###############################################################################
+#                              Notation Explained                             #
+###############################################################################
+# For all subsequent calculations we use the following notation:
+# 1. '*': Placeholder for any word/character. E.g. '*b' stands for
+#    all bigrams that end in 'b'. '*b*' stands for all trigrams that
+#    contain 'b' in the middle.
+# 1. count(ngram): Count all instances (tokens) of an ngram.
+# 1. unique(ngram): Count unique instances (types) of an ngram.
+
+
+@pytest.fixture
+def kneserney_trigram_model(trigram_training_data, vocabulary):
+    model = KneserNeyInterpolated(order=3, discount=0.75, vocabulary=vocabulary)
+    model.fit(trigram_training_data)
+    return model
+
+
+@pytest.mark.parametrize(
+    "word, context, expected_score",
+    [
+        # P(c) = count('*c') / unique('**')
+        #      = 1 / 14
+        ("c", None, 1.0 / 14),
+        # P(z) = count('*z') / unique('**')
+        #      = 0 / 14
+        # 'z' is in the vocabulary, but it was not seen during training.
+        ("z", None, 0.0 / 14),
+        # P(y)
+        # Out of vocabulary should use "UNK" score.
+        # P(y) = P(UNK) = count('*UNK') / unique('**')
+        ("y", None, 3 / 14),
+        # We start with P(c|b)
+        # P(c|b) = alpha('bc') + gamma('b') * P(c)
+        # alpha('bc') = max(unique('*bc') - discount, 0) / unique('*b*')
+        #             = max(1 - 0.75, 0) / 2
+        #             = 0.125
+        # gamma('b')  = discount * unique('b*') / unique('*b*')
+        #             = (0.75 * 2) / 2
+        #             = 0.75
+        ("c", ["b"], (0.125 + 0.75 * (1 / 14))),
+        # Building on that, let's try P(c|ab).
+        # P(c|ab) = alpha('abc') + gamma('ab') * P(c|b)
+        # alpha('abc') = max(count('abc') - discount, 0) / count('ab*')
+        #              = max(1 - 0.75, 0) / 1
+        #              = 0.25
+        # gamma('ab')  = (discount * unique('ab*')) / count('ab*')
+        #              = 0.75 * 1 / 1
+        ("c", ["a", "b"], 0.25 + 0.75 * (0.125 + 0.75 * (1 / 14))),
+        # P(c|zb)
+        # The ngram 'zbc' was not seen, so we use P(c|b). See issue #2332.
+        ("c", ["z", "b"], (0.125 + 0.75 * (1 / 14))),
+    ],
+)
+def test_kneserney_trigram_score(
+    kneserney_trigram_model, word, context, expected_score
+):
+    assert (
+        pytest.approx(kneserney_trigram_model.score(word, context), 1e-4)
+        == expected_score
+    )
+
+
+@pytest.fixture
+def absolute_discounting_trigram_model(trigram_training_data, vocabulary):
+    model = AbsoluteDiscountingInterpolated(order=3, vocabulary=vocabulary)
+    model.fit(trigram_training_data)
+    return model
+
+
+@pytest.mark.parametrize(
+    "word, context, expected_score",
+    [
+        # For unigram scores revert to uniform
+        # P(c) = count('c') / count('**')
+        ("c", None, 1.0 / 18),
+        # in vocabulary but unseen
+        # count('z') = 0
+        ("z", None, 0.0 / 18),
+        # out of vocabulary should use "UNK" score
+        # count('<UNK>') = 3
+        ("y", None, 3 / 18),
+        # P(c|b) = alpha('bc') + gamma('b') * P(c)
+        # alpha('bc') = max(count('bc') - discount, 0) / count('b*')
+        #             = max(1 - 0.75, 0) / 2
+        #             = 0.125
+        # gamma('b')  = discount * unique('b*') / count('b*')
+        #             = (0.75 * 2) / 2
+        #             = 0.75
+        ("c", ["b"], (0.125 + 0.75 * (2 / 2) * (1 / 18))),
+        # Building on that, let's try P(c|ab).
+        # P(c|ab) = alpha('abc') + gamma('ab') * P(c|b)
+        # alpha('abc') = max(count('abc') - discount, 0) / count('ab*')
+        #              = max(1 - 0.75, 0) / 1
+        #              = 0.25
+        # gamma('ab')  = (discount * unique('ab*')) / count('ab*')
+        #              = 0.75 * 1 / 1
+        ("c", ["a", "b"], 0.25 + 0.75 * (0.125 + 0.75 * (2 / 2) * (1 / 18))),
+        # P(c|zb)
+        # The ngram 'zbc' was not seen, so we use P(c|b). See issue #2332.
+        ("c", ["z", "b"], (0.125 + 0.75 * (2 / 2) * (1 / 18))),
+    ],
+)
+def test_absolute_discounting_trigram_score(
+    absolute_discounting_trigram_model, word, context, expected_score
+):
+    assert (
+        pytest.approx(absolute_discounting_trigram_model.score(word, context), 1e-4)
+        == expected_score
+    )
+
+
+@pytest.fixture
+def stupid_backoff_trigram_model(trigram_training_data, vocabulary):
+    model = StupidBackoff(order=3, vocabulary=vocabulary)
+    model.fit(trigram_training_data)
+    return model
+
+
+@pytest.mark.parametrize(
+    "word, context, expected_score",
+    [
+        # For unigram scores revert to uniform
+        # total bigrams = 18
+        ("c", None, 1.0 / 18),
+        # in vocabulary but unseen
+        # bigrams ending with z = 0
+        ("z", None, 0.0 / 18),
+        # out of vocabulary should use "UNK" score
+        # count('<UNK>'): 3
+        ("y", None, 3 / 18),
+        # c follows 1 time out of 2 after b
+        ("c", ["b"], 1 / 2),
+        # c always follows ab
+        ("c", ["a", "b"], 1 / 1),
+        # The ngram 'z b c' was not seen, so we backoff to
+        # the score of the ngram 'b c' * smoothing factor
+        ("c", ["z", "b"], (0.4 * (1 / 2))),
+    ],
+)
+def test_stupid_backoff_trigram_score(
+    stupid_backoff_trigram_model, word, context, expected_score
+):
+    assert (
+        pytest.approx(stupid_backoff_trigram_model.score(word, context), 1e-4)
+        == expected_score
+    )
+
+
+###############################################################################
+#               Probability Distributions Should Sum up to Unity              #
+###############################################################################
+
+
+@pytest.fixture(scope="session")
+def kneserney_bigram_model(bigram_training_data, vocabulary):
+    model = KneserNeyInterpolated(order=2, vocabulary=vocabulary)
+    model.fit(bigram_training_data)
+    return model
+
+
+@pytest.mark.parametrize(
+    "model_fixture",
+    [
+        "mle_bigram_model",
+        "mle_trigram_model",
+        "lidstone_bigram_model",
+        "laplace_bigram_model",
+        "wittenbell_trigram_model",
+        "absolute_discounting_trigram_model",
+        "kneserney_bigram_model",
+        pytest.param(
+            "stupid_backoff_trigram_model",
+            marks=pytest.mark.xfail(
+                reason="Stupid Backoff is not a valid distribution"
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "context",
+    [("a",), ("c",), ("<s>",), ("b",), ("<UNK>",), ("d",), ("e",), ("r",), ("w",)],
+    ids=itemgetter(0),
+)
+def test_sums_to_1(model_fixture, context, request):
+    model = request.getfixturevalue(model_fixture)
+    scores_for_context = sum(model.score(w, context) for w in model.vocab)
+    assert pytest.approx(scores_for_context, 1e-7) == 1.0
+
+
+###############################################################################
+#                               Generating Text                               #
+###############################################################################
+
+
+def test_generate_one_no_context(mle_trigram_model):
+    assert mle_trigram_model.generate(random_seed=3) == "<UNK>"
+
+
+def test_generate_one_from_limiting_context(mle_trigram_model):
+    # We don't need random_seed for contexts with only one continuation
+    assert mle_trigram_model.generate(text_seed=["c"]) == "d"
+    assert mle_trigram_model.generate(text_seed=["b", "c"]) == "d"
+    assert mle_trigram_model.generate(text_seed=["a", "c"]) == "d"
+
+
+def test_generate_one_from_varied_context(mle_trigram_model):
+    # When context doesn't limit our options enough, seed the random choice
+    assert mle_trigram_model.generate(text_seed=("a", "<s>"), random_seed=2) == "a"
+
+
+def test_generate_cycle(mle_trigram_model):
+    # Add a cycle to the model: bd -> b, db -> d
+    more_training_text = [padded_everygrams(mle_trigram_model.order, list("bdbdbd"))]
+
+    mle_trigram_model.fit(more_training_text)
+    # Test that we can escape the cycle
+    assert mle_trigram_model.generate(7, text_seed=("b", "d"), random_seed=5) == [
+        "b",
+        "d",
+        "b",
+        "d",
+        "b",
+        "d",
+        "</s>",
+    ]
+
+
+def test_generate_with_text_seed(mle_trigram_model):
+    assert mle_trigram_model.generate(5, text_seed=("<s>", "e"), random_seed=3) == [
+        "<UNK>",
+        "a",
+        "d",
+        "b",
+        "<UNK>",
+    ]
+
+
+def test_generate_oov_text_seed(mle_trigram_model):
+    assert mle_trigram_model.generate(
+        text_seed=("aliens",), random_seed=3
+    ) == mle_trigram_model.generate(text_seed=("<UNK>",), random_seed=3)
+
+
+def test_generate_None_text_seed(mle_trigram_model):
+    # should crash with type error when we try to look it up in vocabulary
+    with pytest.raises(TypeError):
+        mle_trigram_model.generate(text_seed=(None,))
+
+    # This will work
+    assert mle_trigram_model.generate(
+        text_seed=None, random_seed=3
+    ) == mle_trigram_model.generate(random_seed=3)
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/test_preprocessing.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/test_preprocessing.py
@@ -0,0 +1,30 @@
+# Natural Language Toolkit: Language Model Unit Tests
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+import unittest
+
+from nltk.lm.preprocessing import padded_everygram_pipeline
+
+
+class TestPreprocessing(unittest.TestCase):
+    def test_padded_everygram_pipeline(self):
+        expected_train = [
+            [
+                ("<s>",),
+                ("<s>", "a"),
+                ("a",),
+                ("a", "b"),
+                ("b",),
+                ("b", "c"),
+                ("c",),
+                ("c", "</s>"),
+                ("</s>",),
+            ]
+        ]
+        expected_vocab = ["<s>", "a", "b", "c", "</s>"]
+        train_data, vocab_data = padded_everygram_pipeline(2, [["a", "b", "c"]])
+        self.assertEqual([list(sent) for sent in train_data], expected_train)
+        self.assertEqual(list(vocab_data), expected_vocab)
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/test_vocabulary.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/lm/test_vocabulary.py
@@ -0,0 +1,156 @@
+# Natural Language Toolkit: Language Model Unit Tests
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+import unittest
+from collections import Counter
+from timeit import timeit
+
+from nltk.lm import Vocabulary
+
+
+class NgramModelVocabularyTests(unittest.TestCase):
+    """tests Vocabulary Class"""
+
+    @classmethod
+    def setUpClass(cls):
+        cls.vocab = Vocabulary(
+            ["z", "a", "b", "c", "f", "d", "e", "g", "a", "d", "b", "e", "w"],
+            unk_cutoff=2,
+        )
+
+    def test_truthiness(self):
+        self.assertTrue(self.vocab)
+
+    def test_cutoff_value_set_correctly(self):
+        self.assertEqual(self.vocab.cutoff, 2)
+
+    def test_unable_to_change_cutoff(self):
+        with self.assertRaises(AttributeError):
+            self.vocab.cutoff = 3
+
+    def test_cutoff_setter_checks_value(self):
+        with self.assertRaises(ValueError) as exc_info:
+            Vocabulary("abc", unk_cutoff=0)
+        expected_error_msg = "Cutoff value cannot be less than 1. Got: 0"
+        self.assertEqual(expected_error_msg, str(exc_info.exception))
+
+    def test_counts_set_correctly(self):
+        self.assertEqual(self.vocab.counts["a"], 2)
+        self.assertEqual(self.vocab.counts["b"], 2)
+        self.assertEqual(self.vocab.counts["c"], 1)
+
+    def test_membership_check_respects_cutoff(self):
+        # a was seen 2 times, so it should be considered part of the vocabulary
+        self.assertTrue("a" in self.vocab)
+        # "c" was seen once, it shouldn't be considered part of the vocab
+        self.assertFalse("c" in self.vocab)
+        # "z" was never seen at all, also shouldn't be considered in the vocab
+        self.assertFalse("z" in self.vocab)
+
+    def test_vocab_len_respects_cutoff(self):
+        # Vocab size is the number of unique tokens that occur at least as often
+        # as the cutoff value, plus 1 to account for unknown words.
+        self.assertEqual(5, len(self.vocab))
+
+    def test_vocab_iter_respects_cutoff(self):
+        vocab_counts = ["a", "b", "c", "d", "e", "f", "g", "w", "z"]
+        vocab_items = ["a", "b", "d", "e", "<UNK>"]
+
+        self.assertCountEqual(vocab_counts, list(self.vocab.counts.keys()))
+        self.assertCountEqual(vocab_items, list(self.vocab))
+
+    def test_update_empty_vocab(self):
+        empty = Vocabulary(unk_cutoff=2)
+        self.assertEqual(len(empty), 0)
+        self.assertFalse(empty)
+        self.assertIn(empty.unk_label, empty)
+
+        empty.update(list("abcde"))
+        self.assertIn(empty.unk_label, empty)
+
+    def test_lookup(self):
+        self.assertEqual(self.vocab.lookup("a"), "a")
+        self.assertEqual(self.vocab.lookup("c"), "<UNK>")
+
+    def test_lookup_iterables(self):
+        self.assertEqual(self.vocab.lookup(["a", "b"]), ("a", "b"))
+        self.assertEqual(self.vocab.lookup(("a", "b")), ("a", "b"))
+        self.assertEqual(self.vocab.lookup(("a", "c")), ("a", "<UNK>"))
+        self.assertEqual(
+            self.vocab.lookup(map(str, range(3))), ("<UNK>", "<UNK>", "<UNK>")
+        )
+
+    def test_lookup_empty_iterables(self):
+        self.assertEqual(self.vocab.lookup(()), ())
+        self.assertEqual(self.vocab.lookup([]), ())
+        self.assertEqual(self.vocab.lookup(iter([])), ())
+        self.assertEqual(self.vocab.lookup(n for n in range(0, 0)), ())
+
+    def test_lookup_recursive(self):
+        self.assertEqual(
+            self.vocab.lookup([["a", "b"], ["a", "c"]]), (("a", "b"), ("a", "<UNK>"))
+        )
+        self.assertEqual(self.vocab.lookup([["a", "b"], "c"]), (("a", "b"), "<UNK>"))
+        self.assertEqual(self.vocab.lookup([[[[["a", "b"]]]]]), ((((("a", "b"),),),),))
+
+    def test_lookup_None(self):
+        with self.assertRaises(TypeError):
+            self.vocab.lookup(None)
+        with self.assertRaises(TypeError):
+            list(self.vocab.lookup([None, None]))
+
+    def test_lookup_int(self):
+        with self.assertRaises(TypeError):
+            self.vocab.lookup(1)
+        with self.assertRaises(TypeError):
+            list(self.vocab.lookup([1, 2]))
+
+    def test_lookup_empty_str(self):
+        self.assertEqual(self.vocab.lookup(""), "<UNK>")
+
+    def test_eqality(self):
+        v1 = Vocabulary(["a", "b", "c"], unk_cutoff=1)
+        v2 = Vocabulary(["a", "b", "c"], unk_cutoff=1)
+        v3 = Vocabulary(["a", "b", "c"], unk_cutoff=1, unk_label="blah")
+        v4 = Vocabulary(["a", "b"], unk_cutoff=1)
+
+        self.assertEqual(v1, v2)
+        self.assertNotEqual(v1, v3)
+        self.assertNotEqual(v1, v4)
+
+    def test_str(self):
+        self.assertEqual(
+            str(self.vocab), "<Vocabulary with cutoff=2 unk_label='<UNK>' and 5 items>"
+        )
+
+    def test_creation_with_counter(self):
+        self.assertEqual(
+            self.vocab,
+            Vocabulary(
+                Counter(
+                    ["z", "a", "b", "c", "f", "d", "e", "g", "a", "d", "b", "e", "w"]
+                ),
+                unk_cutoff=2,
+            ),
+        )
+
+    @unittest.skip(
+        reason="Test is known to be flaky as it compares (runtime) performance."
+    )
+    def test_len_is_constant(self):
+        # Given an obviously small and an obviously large vocabulary.
+        small_vocab = Vocabulary("abcde")
+        from nltk.corpus.europarl_raw import english
+
+        large_vocab = Vocabulary(english.words())
+
+        # If we time calling `len` on them.
+        small_vocab_len_time = timeit("len(small_vocab)", globals=locals())
+        large_vocab_len_time = timeit("len(large_vocab)", globals=locals())
+
+        # The timing should be the same order of magnitude.
+        self.assertAlmostEqual(small_vocab_len_time, large_vocab_len_time, places=1)
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_aline.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_aline.py
@@ -0,0 +1,49 @@
+"""
+Test Aline algorithm for aligning phonetic sequences
+"""
+
+from nltk.metrics import aline
+
+
+def test_aline():
+    result = aline.align("θin", "tenwis")
+    expected = [[("θ", "t"), ("i", "e"), ("n", "n")]]
+
+    assert result == expected
+
+    result = aline.align("jo", "ʒə")
+    expected = [[("j", "ʒ"), ("o", "ə")]]
+
+    assert result == expected
+
+    result = aline.align("pematesiweni", "pematesewen")
+    expected = [
+        [
+            ("p", "p"),
+            ("e", "e"),
+            ("m", "m"),
+            ("a", "a"),
+            ("t", "t"),
+            ("e", "e"),
+            ("s", "s"),
+            ("i", "e"),
+            ("w", "w"),
+            ("e", "e"),
+            ("n", "n"),
+        ]
+    ]
+
+    assert result == expected
+
+    result = aline.align("tuwθ", "dentis")
+    expected = [[("t", "t"), ("u", "i"), ("w", "-"), ("θ", "s")]]
+
+    assert result == expected
+
+
+def test_aline_delta():
+    """
+    Test aline for computing the difference between two segments
+    """
+    assert aline.delta("p", "q") == 20.0
+    assert aline.delta("a", "A") == 0.0
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_bllip.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_bllip.py
@@ -0,0 +1,42 @@
+import pytest
+
+from nltk.data import find
+from nltk.parse.bllip import BllipParser
+from nltk.tree import Tree
+
+
+@pytest.fixture(scope="module")
+def parser():
+    model_dir = find("models/bllip_wsj_no_aux").path
+    return BllipParser.from_unified_model_dir(model_dir)
+
+
+def setup_module():
+    pytest.importorskip("bllipparser")
+
+
+class TestBllipParser:
+    def test_parser_loads_a_valid_tree(self, parser):
+        parsed = parser.parse("I saw the man with the telescope")
+        tree = next(parsed)
+
+        assert isinstance(tree, Tree)
+        assert (
+            tree.pformat()
+            == """
+(S1
+  (S
+    (NP (PRP I))
+    (VP
+      (VBD saw)
+      (NP (DT the) (NN man))
+      (PP (IN with) (NP (DT the) (NN telescope))))))
+""".strip()
+        )
+
+    def test_tagged_parse_finds_matching_element(self, parser):
+        parsed = parser.parse("I saw the man with the telescope")
+        tagged_tree = next(parser.tagged_parse([("telescope", "NN")]))
+
+        assert isinstance(tagged_tree, Tree)
+        assert tagged_tree.pformat() == "(S1 (NP (NN telescope)))"
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_brill.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_brill.py
@@ -0,0 +1,34 @@
+"""
+Tests for Brill tagger.
+"""
+
+import unittest
+
+from nltk.corpus import treebank
+from nltk.tag import UnigramTagger, brill, brill_trainer
+from nltk.tbl import demo
+
+
+class TestBrill(unittest.TestCase):
+    def test_pos_template(self):
+        train_sents = treebank.tagged_sents()[:1000]
+        tagger = UnigramTagger(train_sents)
+        trainer = brill_trainer.BrillTaggerTrainer(
+            tagger, [brill.Template(brill.Pos([-1]))]
+        )
+        brill_tagger = trainer.train(train_sents)
+        # Example from https://github.com/nltk/nltk/issues/769
+        result = brill_tagger.tag("This is a foo bar sentence".split())
+        expected = [
+            ("This", "DT"),
+            ("is", "VBZ"),
+            ("a", "DT"),
+            ("foo", None),
+            ("bar", "NN"),
+            ("sentence", None),
+        ]
+        self.assertEqual(result, expected)
+
+    @unittest.skip("Should be tested in __main__ of nltk.tbl.demo")
+    def test_brill_demo(self):
+        demo()
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_cfd_mutation.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_cfd_mutation.py
@@ -0,0 +1,39 @@
+import unittest
+
+import pytest
+
+from nltk import ConditionalFreqDist, tokenize
+
+
+class TestEmptyCondFreq(unittest.TestCase):
+    def test_tabulate(self):
+        empty = ConditionalFreqDist()
+        self.assertEqual(empty.conditions(), [])
+        with pytest.raises(ValueError):
+            empty.tabulate(conditions="BUG")  # nonexistent keys shouldn't be added
+        self.assertEqual(empty.conditions(), [])
+
+    def test_plot(self):
+        empty = ConditionalFreqDist()
+        self.assertEqual(empty.conditions(), [])
+        empty.plot(conditions=["BUG"])  # nonexistent keys shouldn't be added
+        self.assertEqual(empty.conditions(), [])
+
+    def test_increment(self):
+        # make sure that we can still mutate cfd normally
+        text = "cow cat mouse cat tiger"
+        cfd = ConditionalFreqDist()
+
+        # create cfd with word length as condition
+        for word in tokenize.word_tokenize(text):
+            condition = len(word)
+            cfd[condition][word] += 1
+
+        self.assertEqual(cfd.conditions(), [3, 5])
+
+        # incrementing previously unseen key is still possible
+        cfd[2]["hi"] += 1
+        self.assertCountEqual(cfd.conditions(), [3, 5, 2])  # new condition added
+        self.assertEqual(
+            cfd[2]["hi"], 1
+        )  # key's frequency incremented from 0 (unseen) to 1
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_cfg2chomsky.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_cfg2chomsky.py
@@ -0,0 +1,49 @@
+import unittest
+
+import nltk
+from nltk.grammar import CFG
+
+
+class ChomskyNormalFormForCFGTest(unittest.TestCase):
+    def test_simple(self):
+        grammar = CFG.fromstring(
+            """
+          S -> NP VP
+          PP -> P NP
+          NP -> Det N | NP PP P
+          VP -> V NP | VP PP
+          VP -> Det
+          Det -> 'a' | 'the'
+          N -> 'dog' | 'cat'
+          V -> 'chased' | 'sat'
+          P -> 'on' | 'in'
+        """
+        )
+        self.assertFalse(grammar.is_flexible_chomsky_normal_form())
+        self.assertFalse(grammar.is_chomsky_normal_form())
+        grammar = grammar.chomsky_normal_form(flexible=True)
+        self.assertTrue(grammar.is_flexible_chomsky_normal_form())
+        self.assertFalse(grammar.is_chomsky_normal_form())
+
+        grammar2 = CFG.fromstring(
+            """
+          S -> NP VP
+          NP -> VP N P
+          VP -> P
+          N -> 'dog' | 'cat'
+          P -> 'on' | 'in'
+        """
+        )
+        self.assertFalse(grammar2.is_flexible_chomsky_normal_form())
+        self.assertFalse(grammar2.is_chomsky_normal_form())
+        grammar2 = grammar2.chomsky_normal_form()
+        self.assertTrue(grammar2.is_flexible_chomsky_normal_form())
+        self.assertTrue(grammar2.is_chomsky_normal_form())
+
+    def test_complex(self):
+        grammar = nltk.data.load("grammars/large_grammars/atis.cfg")
+        self.assertFalse(grammar.is_flexible_chomsky_normal_form())
+        self.assertFalse(grammar.is_chomsky_normal_form())
+        grammar = grammar.chomsky_normal_form(flexible=True)
+        self.assertTrue(grammar.is_flexible_chomsky_normal_form())
+        self.assertFalse(grammar.is_chomsky_normal_form())
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_chunk.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_chunk.py
@@ -0,0 +1,85 @@
+import unittest
+
+from nltk import RegexpParser
+
+
+class TestChunkRule(unittest.TestCase):
+    def test_tag_pattern2re_pattern_quantifier(self):
+        """Test for bug https://github.com/nltk/nltk/issues/1597
+
+        Ensures that curly bracket quantifiers can be used inside a chunk rule.
+        This type of quantifier has been used for the supplementary example
+        in https://www.nltk.org/book/ch07.html#exploring-text-corpora.
+        """
+        sent = [
+            ("The", "AT"),
+            ("September-October", "NP"),
+            ("term", "NN"),
+            ("jury", "NN"),
+            ("had", "HVD"),
+            ("been", "BEN"),
+            ("charged", "VBN"),
+            ("by", "IN"),
+            ("Fulton", "NP-TL"),
+            ("Superior", "JJ-TL"),
+            ("Court", "NN-TL"),
+            ("Judge", "NN-TL"),
+            ("Durwood", "NP"),
+            ("Pye", "NP"),
+            ("to", "TO"),
+            ("investigate", "VB"),
+            ("reports", "NNS"),
+            ("of", "IN"),
+            ("possible", "JJ"),
+            ("``", "``"),
+            ("irregularities", "NNS"),
+            ("''", "''"),
+            ("in", "IN"),
+            ("the", "AT"),
+            ("hard-fought", "JJ"),
+            ("primary", "NN"),
+            ("which", "WDT"),
+            ("was", "BEDZ"),
+            ("won", "VBN"),
+            ("by", "IN"),
+            ("Mayor-nominate", "NN-TL"),
+            ("Ivan", "NP"),
+            ("Allen", "NP"),
+            ("Jr.", "NP"),
+            (".", "."),
+        ]  # source: brown corpus
+        cp = RegexpParser("CHUNK: {<N.*>{4,}}")
+        tree = cp.parse(sent)
+        assert (
+            tree.pformat()
+            == """(S
+  The/AT
+  September-October/NP
+  term/NN
+  jury/NN
+  had/HVD
+  been/BEN
+  charged/VBN
+  by/IN
+  Fulton/NP-TL
+  Superior/JJ-TL
+  (CHUNK Court/NN-TL Judge/NN-TL Durwood/NP Pye/NP)
+  to/TO
+  investigate/VB
+  reports/NNS
+  of/IN
+  possible/JJ
+  ``/``
+  irregularities/NNS
+  ''/''
+  in/IN
+  the/AT
+  hard-fought/JJ
+  primary/NN
+  which/WDT
+  was/BEDZ
+  won/VBN
+  by/IN
+  (CHUNK Mayor-nominate/NN-TL Ivan/NP Allen/NP Jr./NP)
+  ./.)"""
+        )
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_classify.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_classify.py
@@ -0,0 +1,50 @@
+"""
+Unit tests for nltk.classify. See also: nltk/test/classify.doctest
+"""
+
+import pytest
+
+from nltk import classify
+
+TRAIN = [
+    (dict(a=1, b=1, c=1), "y"),
+    (dict(a=1, b=1, c=1), "x"),
+    (dict(a=1, b=1, c=0), "y"),
+    (dict(a=0, b=1, c=1), "x"),
+    (dict(a=0, b=1, c=1), "y"),
+    (dict(a=0, b=0, c=1), "y"),
+    (dict(a=0, b=1, c=0), "x"),
+    (dict(a=0, b=0, c=0), "x"),
+    (dict(a=0, b=1, c=1), "y"),
+]
+
+TEST = [
+    (dict(a=1, b=0, c=1)),  # unseen
+    (dict(a=1, b=0, c=0)),  # unseen
+    (dict(a=0, b=1, c=1)),  # seen 3 times, labels=y,y,x
+    (dict(a=0, b=1, c=0)),  # seen 1 time, label=x
+]
+
+RESULTS = [(0.16, 0.84), (0.46, 0.54), (0.41, 0.59), (0.76, 0.24)]
+
+
+def assert_classifier_correct(algorithm):
+    try:
+        classifier = classify.MaxentClassifier.train(
+            TRAIN, algorithm, trace=0, max_iter=1000
+        )
+    except (LookupError, AttributeError) as e:
+        pytest.skip(str(e))
+
+    for (px, py), featureset in zip(RESULTS, TEST):
+        pdist = classifier.prob_classify(featureset)
+        assert abs(pdist.prob("x") - px) < 1e-2, (pdist.prob("x"), px)
+        assert abs(pdist.prob("y") - py) < 1e-2, (pdist.prob("y"), py)
+
+
+def test_megam():
+    assert_classifier_correct("MEGAM")
+
+
+def test_tadm():
+    assert_classifier_correct("TADM")
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_collocations.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_collocations.py
@@ -0,0 +1,120 @@
+from nltk.collocations import BigramCollocationFinder
+from nltk.metrics import BigramAssocMeasures
+
+## Test bigram counters with discontinuous bigrams and repeated words
+
+_EPSILON = 1e-8
+SENT = "this this is is a a test test".split()
+
+
+def close_enough(x, y):
+    """Verify that two sequences of n-gram association values are within
+    _EPSILON of each other.
+    """
+
+    return all(abs(x1[1] - y1[1]) <= _EPSILON for x1, y1 in zip(x, y))
+
+
+def test_bigram2():
+    b = BigramCollocationFinder.from_words(SENT)
+
+    assert sorted(b.ngram_fd.items()) == [
+        (("a", "a"), 1),
+        (("a", "test"), 1),
+        (("is", "a"), 1),
+        (("is", "is"), 1),
+        (("test", "test"), 1),
+        (("this", "is"), 1),
+        (("this", "this"), 1),
+    ]
+    assert sorted(b.word_fd.items()) == [("a", 2), ("is", 2), ("test", 2), ("this", 2)]
+
+    assert len(SENT) == sum(b.word_fd.values()) == sum(b.ngram_fd.values()) + 1
+    assert close_enough(
+        sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
+        [
+            (("a", "a"), 1.0),
+            (("a", "test"), 1.0),
+            (("is", "a"), 1.0),
+            (("is", "is"), 1.0),
+            (("test", "test"), 1.0),
+            (("this", "is"), 1.0),
+            (("this", "this"), 1.0),
+        ],
+    )
+
+
+def test_bigram3():
+    b = BigramCollocationFinder.from_words(SENT, window_size=3)
+    assert sorted(b.ngram_fd.items()) == sorted(
+        [
+            (("a", "test"), 3),
+            (("is", "a"), 3),
+            (("this", "is"), 3),
+            (("a", "a"), 1),
+            (("is", "is"), 1),
+            (("test", "test"), 1),
+            (("this", "this"), 1),
+        ]
+    )
+
+    assert sorted(b.word_fd.items()) == sorted(
+        [("a", 2), ("is", 2), ("test", 2), ("this", 2)]
+    )
+
+    assert (
+        len(SENT) == sum(b.word_fd.values()) == (sum(b.ngram_fd.values()) + 2 + 1) / 2.0
+    )
+    assert close_enough(
+        sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
+        sorted(
+            [
+                (("a", "test"), 1.584962500721156),
+                (("is", "a"), 1.584962500721156),
+                (("this", "is"), 1.584962500721156),
+                (("a", "a"), 0.0),
+                (("is", "is"), 0.0),
+                (("test", "test"), 0.0),
+                (("this", "this"), 0.0),
+            ]
+        ),
+    )
+
+
+def test_bigram5():
+    b = BigramCollocationFinder.from_words(SENT, window_size=5)
+    assert sorted(b.ngram_fd.items()) == sorted(
+        [
+            (("a", "test"), 4),
+            (("is", "a"), 4),
+            (("this", "is"), 4),
+            (("is", "test"), 3),
+            (("this", "a"), 3),
+            (("a", "a"), 1),
+            (("is", "is"), 1),
+            (("test", "test"), 1),
+            (("this", "this"), 1),
+        ]
+    )
+    assert sorted(b.word_fd.items()) == sorted(
+        [("a", 2), ("is", 2), ("test", 2), ("this", 2)]
+    )
+    n_word_fd = sum(b.word_fd.values())
+    n_ngram_fd = (sum(b.ngram_fd.values()) + 4 + 3 + 2 + 1) / 4.0
+    assert len(SENT) == n_word_fd == n_ngram_fd
+    assert close_enough(
+        sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
+        sorted(
+            [
+                (("a", "test"), 1.0),
+                (("is", "a"), 1.0),
+                (("this", "is"), 1.0),
+                (("is", "test"), 0.5849625007211562),
+                (("this", "a"), 0.5849625007211562),
+                (("a", "a"), -1.0),
+                (("is", "is"), -1.0),
+                (("test", "test"), -1.0),
+                (("this", "this"), -1.0),
+            ]
+        ),
+    )
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_concordance.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_concordance.py
@@ -0,0 +1,98 @@
+import contextlib
+import sys
+import unittest
+from io import StringIO
+
+from nltk.corpus import gutenberg
+from nltk.text import Text
+
+
+@contextlib.contextmanager
+def stdout_redirect(where):
+    sys.stdout = where
+    try:
+        yield where
+    finally:
+        sys.stdout = sys.__stdout__
+
+
+class TestConcordance(unittest.TestCase):
+    """Text constructed using: https://www.nltk.org/book/ch01.html"""
+
+    @classmethod
+    def setUpClass(cls):
+        cls.corpus = gutenberg.words("melville-moby_dick.txt")
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def setUp(self):
+        self.text = Text(TestConcordance.corpus)
+        self.query = "monstrous"
+        self.maxDiff = None
+        self.list_out = [
+            "ong the former , one was of a most monstrous size . ... This came towards us , ",
+            'ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r',
+            "ll over with a heathenish array of monstrous clubs and spears . Some were thick",
+            "d as you gazed , and wondered what monstrous cannibal and savage could ever hav",
+            "that has survived the flood ; most monstrous and most mountainous ! That Himmal",
+            "they might scout at Moby Dick as a monstrous fable , or still worse and more de",
+            "th of Radney .'\" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l",
+            "ing Scenes . In connexion with the monstrous pictures of whales , I am strongly",
+            "ere to enter upon those still more monstrous stories of them which are to be fo",
+            "ght have been rummaged out of this monstrous cabinet there is no telling . But ",
+            "of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u",
+        ]
+
+    def tearDown(self):
+        pass
+
+    def test_concordance_list(self):
+        concordance_out = self.text.concordance_list(self.query)
+        self.assertEqual(self.list_out, [c.line for c in concordance_out])
+
+    def test_concordance_width(self):
+        list_out = [
+            "monstrous",
+            "monstrous",
+            "monstrous",
+            "monstrous",
+            "monstrous",
+            "monstrous",
+            "Monstrous",
+            "monstrous",
+            "monstrous",
+            "monstrous",
+            "monstrous",
+        ]
+
+        concordance_out = self.text.concordance_list(self.query, width=0)
+        self.assertEqual(list_out, [c.query for c in concordance_out])
+
+    def test_concordance_lines(self):
+        concordance_out = self.text.concordance_list(self.query, lines=3)
+        self.assertEqual(self.list_out[:3], [c.line for c in concordance_out])
+
+    def test_concordance_print(self):
+        print_out = """Displaying 11 of 11 matches:
+        ong the former , one was of a most monstrous size . ... This came towards us ,
+        ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r
+        ll over with a heathenish array of monstrous clubs and spears . Some were thick
+        d as you gazed , and wondered what monstrous cannibal and savage could ever hav
+        that has survived the flood ; most monstrous and most mountainous ! That Himmal
+        they might scout at Moby Dick as a monstrous fable , or still worse and more de
+        th of Radney .'" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l
+        ing Scenes . In connexion with the monstrous pictures of whales , I am strongly
+        ere to enter upon those still more monstrous stories of them which are to be fo
+        ght have been rummaged out of this monstrous cabinet there is no telling . But
+        of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u
+        """
+
+        with stdout_redirect(StringIO()) as stdout:
+            self.text.concordance(self.query)
+
+        def strip_space(raw_str):
+            return raw_str.replace(" ", "")
+
+        self.assertEqual(strip_space(print_out), strip_space(stdout.getvalue()))
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_corenlp.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_corenlp.py
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_corpora.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_corpora.py
@@ -0,0 +1,275 @@
+import unittest
+
+import pytest
+
+from nltk.corpus import (  # mwa_ppdb
+    cess_cat,
+    cess_esp,
+    conll2007,
+    floresta,
+    indian,
+    ptb,
+    sinica_treebank,
+    udhr,
+)
+from nltk.tree import Tree
+
+
+class TestUdhr(unittest.TestCase):
+    def test_words(self):
+        for name in udhr.fileids():
+            words = list(udhr.words(name))
+            self.assertTrue(words)
+
+    def test_raw_unicode(self):
+        for name in udhr.fileids():
+            txt = udhr.raw(name)
+            assert not isinstance(txt, bytes), name
+
+    def test_polish_encoding(self):
+        text_pl = udhr.raw("Polish-Latin2")[:164]
+        text_ppl = udhr.raw("Polish_Polski-Latin2")[:164]
+        expected = """POWSZECHNA DEKLARACJA PRAW CZŁOWIEKA
+[Preamble]
+Trzecia Sesja Ogólnego Zgromadzenia ONZ, obradująca w Paryżu, \
+uchwaliła 10 grudnia 1948 roku jednomyślnie Powszechną"""
+
+        assert text_pl == expected, "Polish-Latin2"
+        assert text_ppl == expected, "Polish_Polski-Latin2"
+
+
+class TestIndian(unittest.TestCase):
+    def test_words(self):
+        words = indian.words()[:3]
+        self.assertEqual(words, ["মহিষের", "সন্তান", ":"])
+
+    def test_tagged_words(self):
+        tagged_words = indian.tagged_words()[:3]
+        self.assertEqual(
+            tagged_words, [("মহিষের", "NN"), ("সন্তান", "NN"), (":", "SYM")]
+        )
+
+
+class TestCess(unittest.TestCase):
+    def test_catalan(self):
+        words = cess_cat.words()[:15]
+        txt = "El Tribunal_Suprem -Fpa- TS -Fpt- ha confirmat la condemna a quatre anys d' inhabilitació especial"
+        self.assertEqual(words, txt.split())
+        self.assertEqual(cess_cat.tagged_sents()[0][34][0], "càrrecs")
+
+    def test_esp(self):
+        words = cess_esp.words()[:15]
+        txt = "El grupo estatal Electricité_de_France -Fpa- EDF -Fpt- anunció hoy , jueves , la compra del"
+        self.assertEqual(words, txt.split())
+        self.assertEqual(cess_esp.words()[115], "años")
+
+
+class TestFloresta(unittest.TestCase):
+    def test_words(self):
+        words = floresta.words()[:10]
+        txt = "Um revivalismo refrescante O 7_e_Meio é um ex-libris de a"
+        self.assertEqual(words, txt.split())
+
+
+class TestSinicaTreebank(unittest.TestCase):
+    def test_sents(self):
+        first_3_sents = sinica_treebank.sents()[:3]
+        self.assertEqual(
+            first_3_sents,
+            [["一"], ["友情"], ["嘉珍", "和", "我", "住在", "同一條", "巷子"]],
+        )
+
+    def test_parsed_sents(self):
+        parsed_sents = sinica_treebank.parsed_sents()[25]
+        self.assertEqual(
+            parsed_sents,
+            Tree(
+                "S",
+                [
+                    Tree("NP", [Tree("Nba", ["嘉珍"])]),
+                    Tree("V‧地", [Tree("VA11", ["不停"]), Tree("DE", ["的"])]),
+                    Tree("VA4", ["哭泣"]),
+                ],
+            ),
+        )
+
+
+class TestCoNLL2007(unittest.TestCase):
+    # Reading the CoNLL 2007 Dependency Treebanks
+
+    def test_sents(self):
+        sents = conll2007.sents("esp.train")[0]
+        self.assertEqual(
+            sents[:6], ["El", "aumento", "del", "índice", "de", "desempleo"]
+        )
+
+    def test_parsed_sents(self):
+        parsed_sents = conll2007.parsed_sents("esp.train")[0]
+
+        self.assertEqual(
+            parsed_sents.tree(),
+            Tree(
+                "fortaleció",
+                [
+                    Tree(
+                        "aumento",
+                        [
+                            "El",
+                            Tree(
+                                "del",
+                                [
+                                    Tree(
+                                        "índice",
+                                        [
+                                            Tree(
+                                                "de",
+                                                [Tree("desempleo", ["estadounidense"])],
+                                            )
+                                        ],
+                                    )
+                                ],
+                            ),
+                        ],
+                    ),
+                    "hoy",
+                    "considerablemente",
+                    Tree(
+                        "al",
+                        [
+                            Tree(
+                                "euro",
+                                [
+                                    Tree(
+                                        "cotizaba",
+                                        [
+                                            ",",
+                                            "que",
+                                            Tree("a", [Tree("15.35", ["las", "GMT"])]),
+                                            "se",
+                                            Tree(
+                                                "en",
+                                                [
+                                                    Tree(
+                                                        "mercado",
+                                                        [
+                                                            "el",
+                                                            Tree("de", ["divisas"]),
+                                                            Tree("de", ["Fráncfort"]),
+                                                        ],
+                                                    )
+                                                ],
+                                            ),
+                                            Tree("a", ["0,9452_dólares"]),
+                                            Tree(
+                                                "frente_a",
+                                                [
+                                                    ",",
+                                                    Tree(
+                                                        "0,9349_dólares",
+                                                        [
+                                                            "los",
+                                                            Tree(
+                                                                "de",
+                                                                [
+                                                                    Tree(
+                                                                        "mañana",
+                                                                        ["esta"],
+                                                                    )
+                                                                ],
+                                                            ),
+                                                        ],
+                                                    ),
+                                                ],
+                                            ),
+                                        ],
+                                    )
+                                ],
+                            )
+                        ],
+                    ),
+                    ".",
+                ],
+            ),
+        )
+
+
+@pytest.mark.skipif(
+    not ptb.fileids(),
+    reason="A full installation of the Penn Treebank is not available",
+)
+class TestPTB(unittest.TestCase):
+    def test_fileids(self):
+        self.assertEqual(
+            ptb.fileids()[:4],
+            [
+                "BROWN/CF/CF01.MRG",
+                "BROWN/CF/CF02.MRG",
+                "BROWN/CF/CF03.MRG",
+                "BROWN/CF/CF04.MRG",
+            ],
+        )
+
+    def test_words(self):
+        self.assertEqual(
+            ptb.words("WSJ/00/WSJ_0003.MRG")[:7],
+            ["A", "form", "of", "asbestos", "once", "used", "*"],
+        )
+
+    def test_tagged_words(self):
+        self.assertEqual(
+            ptb.tagged_words("WSJ/00/WSJ_0003.MRG")[:3],
+            [("A", "DT"), ("form", "NN"), ("of", "IN")],
+        )
+
+    def test_categories(self):
+        self.assertEqual(
+            ptb.categories(),
+            [
+                "adventure",
+                "belles_lettres",
+                "fiction",
+                "humor",
+                "lore",
+                "mystery",
+                "news",
+                "romance",
+                "science_fiction",
+            ],
+        )
+
+    def test_news_fileids(self):
+        self.assertEqual(
+            ptb.fileids("news")[:3],
+            ["WSJ/00/WSJ_0001.MRG", "WSJ/00/WSJ_0002.MRG", "WSJ/00/WSJ_0003.MRG"],
+        )
+
+    def test_category_words(self):
+        self.assertEqual(
+            ptb.words(categories=["humor", "fiction"])[:6],
+            ["Thirty-three", "Scotty", "did", "not", "go", "back"],
+        )
+
+
+@pytest.mark.skip("Skipping test for mwa_ppdb.")
+class TestMWAPPDB(unittest.TestCase):
+    def test_fileids(self):
+        self.assertEqual(
+            mwa_ppdb.fileids(), ["ppdb-1.0-xxxl-lexical.extended.synonyms.uniquepairs"]
+        )
+
+    def test_entries(self):
+        self.assertEqual(
+            mwa_ppdb.entries()[:10],
+            [
+                ("10/17/01", "17/10/2001"),
+                ("102,70", "102.70"),
+                ("13,53", "13.53"),
+                ("3.2.5.3.2.1", "3.2.5.3.2.1."),
+                ("53,76", "53.76"),
+                ("6.9.5", "6.9.5."),
+                ("7.7.6.3", "7.7.6.3."),
+                ("76,20", "76.20"),
+                ("79,85", "79.85"),
+                ("93,65", "93.65"),
+            ],
+        )
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_corpus_views.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_corpus_views.py
@@ -0,0 +1,48 @@
+"""
+Corpus View Regression Tests
+"""
+
+import unittest
+
+import nltk.data
+from nltk.corpus.reader.util import (
+    StreamBackedCorpusView,
+    read_line_block,
+    read_whitespace_block,
+)
+
+
+class TestCorpusViews(unittest.TestCase):
+    linetok = nltk.LineTokenizer(blanklines="keep")
+    names = [
+        "corpora/inaugural/README",  # A very short file (160 chars)
+        "corpora/inaugural/1793-Washington.txt",  # A relatively short file (791 chars)
+        "corpora/inaugural/1909-Taft.txt",  # A longer file (32k chars)
+    ]
+
+    def data(self):
+        for name in self.names:
+            f = nltk.data.find(name)
+            with f.open() as fp:
+                file_data = fp.read().decode("utf8")
+            yield f, file_data
+
+    def test_correct_values(self):
+        # Check that corpus views produce the correct sequence of values.
+
+        for f, file_data in self.data():
+            v = StreamBackedCorpusView(f, read_whitespace_block)
+            self.assertEqual(list(v), file_data.split())
+
+            v = StreamBackedCorpusView(f, read_line_block)
+            self.assertEqual(list(v), self.linetok.tokenize(file_data))
+
+    def test_correct_length(self):
+        # Check that the corpus views report the correct lengths:
+
+        for f, file_data in self.data():
+            v = StreamBackedCorpusView(f, read_whitespace_block)
+            self.assertEqual(len(v), len(file_data.split()))
+
+            v = StreamBackedCorpusView(f, read_line_block)
+            self.assertEqual(len(v), len(self.linetok.tokenize(file_data)))
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_data.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_data.py
@@ -0,0 +1,15 @@
+import pytest
+
+import nltk.data
+
+
+def test_find_raises_exception():
+    with pytest.raises(LookupError):
+        nltk.data.find("no_such_resource/foo")
+
+
+def test_find_raises_exception_with_full_resource_name():
+    no_such_thing = "no_such_thing/bar"
+    with pytest.raises(LookupError) as exc:
+        nltk.data.find(no_such_thing)
+        assert no_such_thing in str(exc)
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_disagreement.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_disagreement.py
@@ -0,0 +1,160 @@
+import unittest
+
+from nltk.metrics.agreement import AnnotationTask
+
+
+class TestDisagreement(unittest.TestCase):
+    """
+    Class containing unit tests for nltk.metrics.agreement.Disagreement.
+    """
+
+    def test_easy(self):
+        """
+        Simple test, based on
+        https://github.com/foolswood/krippendorffs_alpha/raw/master/krippendorff.pdf.
+        """
+        data = [
+            ("coder1", "dress1", "YES"),
+            ("coder2", "dress1", "NO"),
+            ("coder3", "dress1", "NO"),
+            ("coder1", "dress2", "YES"),
+            ("coder2", "dress2", "NO"),
+            ("coder3", "dress3", "NO"),
+        ]
+        annotation_task = AnnotationTask(data)
+        self.assertAlmostEqual(annotation_task.alpha(), -0.3333333)
+
+    def test_easy2(self):
+        """
+        Same simple test with 1 rating removed.
+        Removal of that rating should not matter: K-Apha ignores items with
+        only 1 rating.
+        """
+        data = [
+            ("coder1", "dress1", "YES"),
+            ("coder2", "dress1", "NO"),
+            ("coder3", "dress1", "NO"),
+            ("coder1", "dress2", "YES"),
+            ("coder2", "dress2", "NO"),
+        ]
+        annotation_task = AnnotationTask(data)
+        self.assertAlmostEqual(annotation_task.alpha(), -0.3333333)
+
+    def test_easy3(self):
+        """
+        If expected disagreement is 0, K-Apha should be 1.
+        """
+        data = [
+            ("coder1", "1", 1),
+            ("coder2", "1", 1),
+            ("coder1", "2", 2),
+            ("coder2", "2", 2),
+        ]
+        annotation_task = AnnotationTask(data)
+        self.assertAlmostEqual(annotation_task.alpha(), 1.0)
+
+        data = [("coder1", "1", 1), ("coder2", "1", 1), ("coder1", "2", 2)]
+        annotation_task = AnnotationTask(data)
+        self.assertAlmostEqual(annotation_task.alpha(), 1.0)
+
+    def test_advanced(self):
+        """
+        More advanced test, based on
+        http://www.agreestat.com/research_papers/onkrippendorffalpha.pdf
+        """
+        data = [
+            ("A", "1", "1"),
+            ("B", "1", "1"),
+            ("D", "1", "1"),
+            ("A", "2", "2"),
+            ("B", "2", "2"),
+            ("C", "2", "3"),
+            ("D", "2", "2"),
+            ("A", "3", "3"),
+            ("B", "3", "3"),
+            ("C", "3", "3"),
+            ("D", "3", "3"),
+            ("A", "4", "3"),
+            ("B", "4", "3"),
+            ("C", "4", "3"),
+            ("D", "4", "3"),
+            ("A", "5", "2"),
+            ("B", "5", "2"),
+            ("C", "5", "2"),
+            ("D", "5", "2"),
+            ("A", "6", "1"),
+            ("B", "6", "2"),
+            ("C", "6", "3"),
+            ("D", "6", "4"),
+            ("A", "7", "4"),
+            ("B", "7", "4"),
+            ("C", "7", "4"),
+            ("D", "7", "4"),
+            ("A", "8", "1"),
+            ("B", "8", "1"),
+            ("C", "8", "2"),
+            ("D", "8", "1"),
+            ("A", "9", "2"),
+            ("B", "9", "2"),
+            ("C", "9", "2"),
+            ("D", "9", "2"),
+            ("B", "10", "5"),
+            ("C", "10", "5"),
+            ("D", "10", "5"),
+            ("C", "11", "1"),
+            ("D", "11", "1"),
+            ("C", "12", "3"),
+        ]
+        annotation_task = AnnotationTask(data)
+        self.assertAlmostEqual(annotation_task.alpha(), 0.743421052632)
+
+    def test_advanced2(self):
+        """
+        Same more advanced example, but with 1 rating removed.
+        Again, removal of that 1 rating should not matter.
+        """
+        data = [
+            ("A", "1", "1"),
+            ("B", "1", "1"),
+            ("D", "1", "1"),
+            ("A", "2", "2"),
+            ("B", "2", "2"),
+            ("C", "2", "3"),
+            ("D", "2", "2"),
+            ("A", "3", "3"),
+            ("B", "3", "3"),
+            ("C", "3", "3"),
+            ("D", "3", "3"),
+            ("A", "4", "3"),
+            ("B", "4", "3"),
+            ("C", "4", "3"),
+            ("D", "4", "3"),
+            ("A", "5", "2"),
+            ("B", "5", "2"),
+            ("C", "5", "2"),
+            ("D", "5", "2"),
+            ("A", "6", "1"),
+            ("B", "6", "2"),
+            ("C", "6", "3"),
+            ("D", "6", "4"),
+            ("A", "7", "4"),
+            ("B", "7", "4"),
+            ("C", "7", "4"),
+            ("D", "7", "4"),
+            ("A", "8", "1"),
+            ("B", "8", "1"),
+            ("C", "8", "2"),
+            ("D", "8", "1"),
+            ("A", "9", "2"),
+            ("B", "9", "2"),
+            ("C", "9", "2"),
+            ("D", "9", "2"),
+            ("B", "10", "5"),
+            ("C", "10", "5"),
+            ("D", "10", "5"),
+            ("C", "11", "1"),
+            ("D", "11", "1"),
+            ("C", "12", "3"),
+        ]
+        annotation_task = AnnotationTask(data)
+        self.assertAlmostEqual(annotation_task.alpha(), 0.743421052632)
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_distance.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_distance.py
@@ -0,0 +1,129 @@
+from typing import Tuple
+
+import pytest
+
+from nltk.metrics.distance import edit_distance
+
+
+class TestEditDistance:
+    @pytest.mark.parametrize(
+        "left,right,substitution_cost,expecteds",
+        [
+            # Allowing transpositions reduces the number of edits required.
+            # with transpositions:
+            # e.g. "abc" -T-> "cba" -D-> "ca": 2 steps
+            #
+            # without transpositions:
+            # e.g. "abc" -D-> "ab" -D-> "a" -I-> "ca": 3 steps
+            ("abc", "ca", 1, (2, 3)),
+            ("abc", "ca", 5, (2, 3)),  # Doesn't *require* substitutions
+            # Note, a substition_cost of higher than 2 doesn't make much
+            # sense, as a deletion + insertion is identical, and always
+            # costs 2.
+            #
+            #
+            # Transpositions don't always reduce the number of edits required:
+            # with or without transpositions:
+            # e.g. "wants" -D-> "wats" -D-> "was" -I-> "wasp": 3 steps
+            ("wants", "wasp", 1, (3, 3)),
+            ("wants", "wasp", 5, (3, 3)),  # Doesn't *require* substitutions
+            #
+            #
+            # Ought to have the same results with and without transpositions
+            # with or without transpositions:
+            # e.g. "rain" -S-> "sain" -S-> "shin" -I-> "shine": 3 steps
+            # (but cost 5 if substitution_cost=2)
+            ("rain", "shine", 1, (3, 3)),
+            ("rain", "shine", 2, (5, 5)),  # Does *require* substitutions
+            #
+            #
+            # Several potentially interesting typos
+            # with transpositions:
+            # e.g. "acbdef" -T-> "abcdef": 1 step
+            #
+            # without transpositions:
+            # e.g. "acbdef" -D-> "abdef" -I-> "abcdef": 2 steps
+            ("acbdef", "abcdef", 1, (1, 2)),
+            ("acbdef", "abcdef", 2, (1, 2)),  # Doesn't *require* substitutions
+            #
+            #
+            # with transpositions:
+            # e.g. "lnaguaeg" -T-> "languaeg" -T-> "language": 2 steps
+            #
+            # without transpositions:
+            # e.g. "lnaguaeg" -D-> "laguaeg" -I-> "languaeg" -D-> "languag" -I-> "language": 4 steps
+            ("lnaguaeg", "language", 1, (2, 4)),
+            ("lnaguaeg", "language", 2, (2, 4)),  # Doesn't *require* substitutions
+            #
+            #
+            # with transpositions:
+            # e.g. "lnaugage" -T-> "lanugage" -T-> "language": 2 steps
+            #
+            # without transpositions:
+            # e.g. "lnaugage" -S-> "lnangage" -D-> "langage" -I-> "language": 3 steps
+            # (but one substitution, so a cost of 4 if substition_cost = 2)
+            ("lnaugage", "language", 1, (2, 3)),
+            ("lnaugage", "language", 2, (2, 4)),
+            # Does *require* substitutions if no transpositions
+            #
+            #
+            # with transpositions:
+            # e.g. "lngauage" -T-> "lnaguage" -T-> "language": 2 steps
+            # without transpositions:
+            # e.g. "lngauage" -I-> "lanaguage" -D-> "language": 2 steps
+            ("lngauage", "language", 1, (2, 2)),
+            ("lngauage", "language", 2, (2, 2)),  # Doesn't *require* substitutions
+            #
+            #
+            # with or without transpositions:
+            # e.g. "wants" -S-> "sants" -S-> "swnts" -S-> "swits" -S-> "swims" -D-> "swim": 5 steps
+            #
+            # with substitution_cost=2 and transpositions:
+            # e.g. "wants" -T-> "santw" -D-> "sntw" -D-> "stw" -D-> "sw"
+            # -I-> "swi" -I-> "swim": 6 steps
+            #
+            # with substitution_cost=2 and no transpositions:
+            # e.g. "wants" -I-> "swants" -D-> "swant" -D-> "swan" -D-> "swa" -D-> "sw"
+            # -I-> "swi" -I-> "swim": 7 steps
+            ("wants", "swim", 1, (5, 5)),
+            ("wants", "swim", 2, (6, 7)),
+            #
+            #
+            # with or without transpositions:
+            # e.g. "kitten" -S-> "sitten" -s-> "sittin" -I-> "sitting": 3 steps
+            # (but cost 5 if substitution_cost=2)
+            ("kitten", "sitting", 1, (3, 3)),
+            ("kitten", "sitting", 2, (5, 5)),
+            #
+            # duplicated letter
+            # e.g. "duplicated" -D-> "duplicated"
+            ("duplicated", "duuplicated", 1, (1, 1)),
+            ("duplicated", "duuplicated", 2, (1, 1)),
+            ("very duplicated", "very duuplicateed", 2, (2, 2)),
+        ],
+    )
+    def test_with_transpositions(
+        self, left: str, right: str, substitution_cost: int, expecteds: Tuple[int, int]
+    ):
+        """
+        Test `edit_distance` between two strings, given some `substitution_cost`,
+        and whether transpositions are allowed.
+
+        :param str left: First input string to `edit_distance`.
+        :param str right: Second input string to `edit_distance`.
+        :param int substitution_cost: The cost of a substitution action in `edit_distance`.
+        :param Tuple[int, int] expecteds: A tuple of expected outputs, such that `expecteds[0]` is
+            the expected output with `transpositions=True`, and `expecteds[1]` is
+            the expected output with `transpositions=False`.
+        """
+        # Test the input strings in both orderings
+        for s1, s2 in ((left, right), (right, left)):
+            # zip with [True, False] to get the transpositions value
+            for expected, transpositions in zip(expecteds, [True, False]):
+                predicted = edit_distance(
+                    s1,
+                    s2,
+                    substitution_cost=substitution_cost,
+                    transpositions=transpositions,
+                )
+                assert predicted == expected
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_downloader.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_downloader.py
@@ -0,0 +1,83 @@
+import os
+import shutil
+import unittest.mock
+
+from nltk import download
+from nltk.downloader import build_index
+
+
+def test_downloader_using_existing_parent_download_dir(tmp_path):
+    """Test that download works properly when the parent folder of the download_dir exists"""
+
+    download_dir = str(tmp_path.joinpath("another_dir"))
+    download_status = download("mwa_ppdb", download_dir)
+    assert download_status is True
+
+
+def test_downloader_using_non_existing_parent_download_dir(tmp_path):
+    """Test that download works properly when the parent folder of the download_dir does not exist"""
+
+    download_dir = str(
+        tmp_path.joinpath("non-existing-parent-folder", "another-non-existing-folder")
+    )
+    download_status = download("mwa_ppdb", download_dir)
+    assert download_status is True
+
+
+def test_downloader_redownload(tmp_path):
+    """Test that a second download correctly triggers the 'already up-to-date' message"""
+
+    first_download = 0
+    second_download = 1
+
+    download_dir = str(tmp_path.joinpath("test_repeat_download"))
+    for i in range(first_download, second_download + 1):
+        # capsys doesn't capture functools.partial stdout, which nltk.download.show uses, so just mock print
+        with unittest.mock.patch("builtins.print") as print_mock:
+            download_status = download("stopwords", download_dir)
+            assert download_status is True
+            if i == first_download:
+                expected_second_call = unittest.mock.call(
+                    "[nltk_data]   Unzipping %s."
+                    % os.path.join("corpora", "stopwords.zip")
+                )
+                assert print_mock.call_args_list[1].args == expected_second_call.args
+            elif i == second_download:
+                expected_second_call = unittest.mock.call(
+                    "[nltk_data]   Package stopwords is already up-to-date!"
+                )
+                assert print_mock.call_args_list[1].args == expected_second_call.args
+
+
+def test_build_index(tmp_path):
+    """Test building index with both checksums."""
+
+    test_pkg_dir = str(tmp_path.joinpath("packages"))
+    test_pkg_name = "test_package"
+    test_pkg_path = os.path.join(test_pkg_dir, f"{test_pkg_name}")
+    os.makedirs(test_pkg_path, exist_ok=True)
+    test_xml_path = os.path.join(test_pkg_path, f"{test_pkg_name}.xml")
+    with open(test_xml_path, "w") as fi:
+        fi.write(
+            f'<package id="{test_pkg_name}" name="A Test Package" webpage="http://www.somefake.url/"'
+            ' unzip="1"/>'
+        )
+    # Cannot mock a zip here as we are trying to validate file checksums, so just create a simple one with the XML
+    zip_path = os.path.join(test_pkg_path, f"{test_pkg_name}")
+    shutil.make_archive(
+        base_name=zip_path,
+        format="zip",
+        root_dir=test_pkg_dir,
+        base_dir=os.path.basename(test_pkg_path),
+    )
+    xml_index = build_index(
+        root=os.path.dirname(test_pkg_dir), base_url="https://someurl"
+    )
+    package_element = xml_index[0][0]
+    assert package_element.get("id") == "test_package"
+    md5_checksum = package_element.get("checksum")
+    assert isinstance(md5_checksum, str)
+    assert len(md5_checksum) > 5
+    sha256_checksum = package_element.get("sha256_checksum")
+    assert isinstance(sha256_checksum, str)
+    assert len(sha256_checksum) > 5
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_freqdist.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_freqdist.py
@@ -0,0 +1,7 @@
+import nltk
+
+
+def test_iterating_returns_an_iterator_ordered_by_frequency():
+    samples = ["one", "two", "two"]
+    distribution = nltk.FreqDist(samples)
+    assert list(distribution) == ["two", "one"]
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_hmm.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_hmm.py
@@ -0,0 +1,82 @@
+import pytest
+
+from nltk.tag import hmm
+
+
+def _wikipedia_example_hmm():
+    # Example from wikipedia
+    # (https://en.wikipedia.org/wiki/Forward%E2%80%93backward_algorithm)
+
+    states = ["rain", "no rain"]
+    symbols = ["umbrella", "no umbrella"]
+
+    A = [[0.7, 0.3], [0.3, 0.7]]  # transition probabilities
+    B = [[0.9, 0.1], [0.2, 0.8]]  # emission probabilities
+    pi = [0.5, 0.5]  # initial probabilities
+
+    seq = ["umbrella", "umbrella", "no umbrella", "umbrella", "umbrella"]
+    seq = list(zip(seq, [None] * len(seq)))
+
+    model = hmm._create_hmm_tagger(states, symbols, A, B, pi)
+    return model, states, symbols, seq
+
+
+def test_forward_probability():
+    from numpy.testing import assert_array_almost_equal
+
+    # example from p. 385, Huang et al
+    model, states, symbols = hmm._market_hmm_example()
+    seq = [("up", None), ("up", None)]
+    expected = [[0.35, 0.02, 0.09], [0.1792, 0.0085, 0.0357]]
+
+    fp = 2 ** model._forward_probability(seq)
+
+    assert_array_almost_equal(fp, expected)
+
+
+def test_forward_probability2():
+    from numpy.testing import assert_array_almost_equal
+
+    model, states, symbols, seq = _wikipedia_example_hmm()
+    fp = 2 ** model._forward_probability(seq)
+
+    # examples in wikipedia are normalized
+    fp = (fp.T / fp.sum(axis=1)).T
+
+    wikipedia_results = [
+        [0.8182, 0.1818],
+        [0.8834, 0.1166],
+        [0.1907, 0.8093],
+        [0.7308, 0.2692],
+        [0.8673, 0.1327],
+    ]
+
+    assert_array_almost_equal(wikipedia_results, fp, 4)
+
+
+def test_backward_probability():
+    from numpy.testing import assert_array_almost_equal
+
+    model, states, symbols, seq = _wikipedia_example_hmm()
+
+    bp = 2 ** model._backward_probability(seq)
+    # examples in wikipedia are normalized
+
+    bp = (bp.T / bp.sum(axis=1)).T
+
+    wikipedia_results = [
+        # Forward-backward algorithm doesn't need b0_5,
+        # so .backward_probability doesn't compute it.
+        # [0.6469, 0.3531],
+        [0.5923, 0.4077],
+        [0.3763, 0.6237],
+        [0.6533, 0.3467],
+        [0.6273, 0.3727],
+        [0.5, 0.5],
+    ]
+
+    assert_array_almost_equal(wikipedia_results, bp, 4)
+
+
+def setup_module(module):
+    pytest.importorskip("numpy")
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_json2csv_corpus.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_json2csv_corpus.py
@@ -0,0 +1,210 @@
+# Natural Language Toolkit: Twitter client
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Lorenzo Rubio <lrnzcig@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Regression tests for `json2csv()` and `json2csv_entities()` in Twitter
+package.
+"""
+from pathlib import Path
+
+import pytest
+
+from nltk.corpus import twitter_samples
+from nltk.twitter.common import json2csv, json2csv_entities
+
+
+def files_are_identical(pathA, pathB):
+    """
+    Compare two files, ignoring carriage returns,
+    leading whitespace, and trailing whitespace
+    """
+    f1 = [l.strip() for l in pathA.read_bytes().splitlines()]
+    f2 = [l.strip() for l in pathB.read_bytes().splitlines()]
+    return f1 == f2
+
+
+subdir = Path(__file__).parent / "files"
+
+
+@pytest.fixture
+def infile():
+    with open(twitter_samples.abspath("tweets.20150430-223406.json")) as infile:
+        return [next(infile) for x in range(100)]
+
+
+def test_textoutput(tmp_path, infile):
+    ref_fn = subdir / "tweets.20150430-223406.text.csv.ref"
+    outfn = tmp_path / "tweets.20150430-223406.text.csv"
+    json2csv(infile, outfn, ["text"], gzip_compress=False)
+    assert files_are_identical(outfn, ref_fn)
+
+
+def test_tweet_metadata(tmp_path, infile):
+    ref_fn = subdir / "tweets.20150430-223406.tweet.csv.ref"
+    fields = [
+        "created_at",
+        "favorite_count",
+        "id",
+        "in_reply_to_status_id",
+        "in_reply_to_user_id",
+        "retweet_count",
+        "retweeted",
+        "text",
+        "truncated",
+        "user.id",
+    ]
+
+    outfn = tmp_path / "tweets.20150430-223406.tweet.csv"
+    json2csv(infile, outfn, fields, gzip_compress=False)
+    assert files_are_identical(outfn, ref_fn)
+
+
+def test_user_metadata(tmp_path, infile):
+    ref_fn = subdir / "tweets.20150430-223406.user.csv.ref"
+    fields = ["id", "text", "user.id", "user.followers_count", "user.friends_count"]
+
+    outfn = tmp_path / "tweets.20150430-223406.user.csv"
+    json2csv(infile, outfn, fields, gzip_compress=False)
+    assert files_are_identical(outfn, ref_fn)
+
+
+def test_tweet_hashtag(tmp_path, infile):
+    ref_fn = subdir / "tweets.20150430-223406.hashtag.csv.ref"
+    outfn = tmp_path / "tweets.20150430-223406.hashtag.csv"
+    json2csv_entities(
+        infile,
+        outfn,
+        ["id", "text"],
+        "hashtags",
+        ["text"],
+        gzip_compress=False,
+    )
+    assert files_are_identical(outfn, ref_fn)
+
+
+def test_tweet_usermention(tmp_path, infile):
+    ref_fn = subdir / "tweets.20150430-223406.usermention.csv.ref"
+    outfn = tmp_path / "tweets.20150430-223406.usermention.csv"
+    json2csv_entities(
+        infile,
+        outfn,
+        ["id", "text"],
+        "user_mentions",
+        ["id", "screen_name"],
+        gzip_compress=False,
+    )
+    assert files_are_identical(outfn, ref_fn)
+
+
+def test_tweet_media(tmp_path, infile):
+    ref_fn = subdir / "tweets.20150430-223406.media.csv.ref"
+    outfn = tmp_path / "tweets.20150430-223406.media.csv"
+    json2csv_entities(
+        infile,
+        outfn,
+        ["id"],
+        "media",
+        ["media_url", "url"],
+        gzip_compress=False,
+    )
+
+    assert files_are_identical(outfn, ref_fn)
+
+
+def test_tweet_url(tmp_path, infile):
+    ref_fn = subdir / "tweets.20150430-223406.url.csv.ref"
+    outfn = tmp_path / "tweets.20150430-223406.url.csv"
+    json2csv_entities(
+        infile,
+        outfn,
+        ["id"],
+        "urls",
+        ["url", "expanded_url"],
+        gzip_compress=False,
+    )
+
+    assert files_are_identical(outfn, ref_fn)
+
+
+def test_userurl(tmp_path, infile):
+    ref_fn = subdir / "tweets.20150430-223406.userurl.csv.ref"
+    outfn = tmp_path / "tweets.20150430-223406.userurl.csv"
+    json2csv_entities(
+        infile,
+        outfn,
+        ["id", "screen_name"],
+        "user.urls",
+        ["url", "expanded_url"],
+        gzip_compress=False,
+    )
+
+    assert files_are_identical(outfn, ref_fn)
+
+
+def test_tweet_place(tmp_path, infile):
+    ref_fn = subdir / "tweets.20150430-223406.place.csv.ref"
+    outfn = tmp_path / "tweets.20150430-223406.place.csv"
+    json2csv_entities(
+        infile,
+        outfn,
+        ["id", "text"],
+        "place",
+        ["name", "country"],
+        gzip_compress=False,
+    )
+
+    assert files_are_identical(outfn, ref_fn)
+
+
+def test_tweet_place_boundingbox(tmp_path, infile):
+    ref_fn = subdir / "tweets.20150430-223406.placeboundingbox.csv.ref"
+    outfn = tmp_path / "tweets.20150430-223406.placeboundingbox.csv"
+    json2csv_entities(
+        infile,
+        outfn,
+        ["id", "name"],
+        "place.bounding_box",
+        ["coordinates"],
+        gzip_compress=False,
+    )
+
+    assert files_are_identical(outfn, ref_fn)
+
+
+def test_retweet_original_tweet(tmp_path, infile):
+    ref_fn = subdir / "tweets.20150430-223406.retweet.csv.ref"
+    outfn = tmp_path / "tweets.20150430-223406.retweet.csv"
+    json2csv_entities(
+        infile,
+        outfn,
+        ["id"],
+        "retweeted_status",
+        [
+            "created_at",
+            "favorite_count",
+            "id",
+            "in_reply_to_status_id",
+            "in_reply_to_user_id",
+            "retweet_count",
+            "text",
+            "truncated",
+            "user.id",
+        ],
+        gzip_compress=False,
+    )
+
+    assert files_are_identical(outfn, ref_fn)
+
+
+def test_file_is_wrong(tmp_path, infile):
+    """
+    Sanity check that file comparison is not giving false positives.
+    """
+    ref_fn = subdir / "tweets.20150430-223406.retweet.csv.ref"
+    outfn = tmp_path / "tweets.20150430-223406.text.csv"
+    json2csv(infile, outfn, ["text"], gzip_compress=False)
+    assert not files_are_identical(outfn, ref_fn)
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_json_serialization.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_json_serialization.py
@@ -0,0 +1,95 @@
+import unittest
+
+from nltk.corpus import brown
+from nltk.jsontags import JSONTaggedDecoder, JSONTaggedEncoder
+from nltk.tag import (
+    AffixTagger,
+    BigramTagger,
+    BrillTagger,
+    BrillTaggerTrainer,
+    DefaultTagger,
+    NgramTagger,
+    PerceptronTagger,
+    RegexpTagger,
+    TrigramTagger,
+    UnigramTagger,
+)
+from nltk.tag.brill import nltkdemo18
+
+
+class TestJSONSerialization(unittest.TestCase):
+    def setUp(self):
+        self.corpus = brown.tagged_sents()[:35]
+        self.decoder = JSONTaggedDecoder()
+        self.encoder = JSONTaggedEncoder()
+        self.default_tagger = DefaultTagger("NN")
+
+    def test_default_tagger(self):
+        encoded = self.encoder.encode(self.default_tagger)
+        decoded = self.decoder.decode(encoded)
+
+        self.assertEqual(repr(self.default_tagger), repr(decoded))
+        self.assertEqual(self.default_tagger._tag, decoded._tag)
+
+    def test_regexp_tagger(self):
+        tagger = RegexpTagger([(r".*", "NN")], backoff=self.default_tagger)
+
+        encoded = self.encoder.encode(tagger)
+        decoded = self.decoder.decode(encoded)
+
+        self.assertEqual(repr(tagger), repr(decoded))
+        self.assertEqual(repr(tagger.backoff), repr(decoded.backoff))
+        self.assertEqual(tagger._regexps, decoded._regexps)
+
+    def test_affix_tagger(self):
+        tagger = AffixTagger(self.corpus, backoff=self.default_tagger)
+
+        encoded = self.encoder.encode(tagger)
+        decoded = self.decoder.decode(encoded)
+
+        self.assertEqual(repr(tagger), repr(decoded))
+        self.assertEqual(repr(tagger.backoff), repr(decoded.backoff))
+        self.assertEqual(tagger._affix_length, decoded._affix_length)
+        self.assertEqual(tagger._min_word_length, decoded._min_word_length)
+        self.assertEqual(tagger._context_to_tag, decoded._context_to_tag)
+
+    def test_ngram_taggers(self):
+        unitagger = UnigramTagger(self.corpus, backoff=self.default_tagger)
+        bitagger = BigramTagger(self.corpus, backoff=unitagger)
+        tritagger = TrigramTagger(self.corpus, backoff=bitagger)
+        ntagger = NgramTagger(4, self.corpus, backoff=tritagger)
+
+        encoded = self.encoder.encode(ntagger)
+        decoded = self.decoder.decode(encoded)
+
+        self.assertEqual(repr(ntagger), repr(decoded))
+        self.assertEqual(repr(tritagger), repr(decoded.backoff))
+        self.assertEqual(repr(bitagger), repr(decoded.backoff.backoff))
+        self.assertEqual(repr(unitagger), repr(decoded.backoff.backoff.backoff))
+        self.assertEqual(
+            repr(self.default_tagger), repr(decoded.backoff.backoff.backoff.backoff)
+        )
+
+    def test_perceptron_tagger(self):
+        tagger = PerceptronTagger(load=False)
+        tagger.train(self.corpus)
+
+        encoded = self.encoder.encode(tagger)
+        decoded = self.decoder.decode(encoded)
+
+        self.assertEqual(tagger.model.weights, decoded.model.weights)
+        self.assertEqual(tagger.tagdict, decoded.tagdict)
+        self.assertEqual(tagger.classes, decoded.classes)
+
+    def test_brill_tagger(self):
+        trainer = BrillTaggerTrainer(
+            self.default_tagger, nltkdemo18(), deterministic=True
+        )
+        tagger = trainer.train(self.corpus, max_rules=30)
+
+        encoded = self.encoder.encode(tagger)
+        decoded = self.decoder.decode(encoded)
+
+        self.assertEqual(repr(tagger._initial_tagger), repr(decoded._initial_tagger))
+        self.assertEqual(tagger._rules, decoded._rules)
+        self.assertEqual(tagger._training_stats, decoded._training_stats)
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_metrics.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_metrics.py
@@ -0,0 +1,66 @@
+import unittest
+
+from nltk.metrics import (
+    BigramAssocMeasures,
+    QuadgramAssocMeasures,
+    TrigramAssocMeasures,
+)
+
+## Test the likelihood ratio metric
+
+_DELTA = 1e-8
+
+
+class TestLikelihoodRatio(unittest.TestCase):
+    def test_lr_bigram(self):
+        self.assertAlmostEqual(
+            BigramAssocMeasures.likelihood_ratio(2, (4, 4), 20),
+            2.4142743368419755,
+            delta=_DELTA,
+        )
+        self.assertAlmostEqual(
+            BigramAssocMeasures.likelihood_ratio(1, (1, 1), 1), 0.0, delta=_DELTA
+        )
+        self.assertRaises(
+            ValueError,
+            BigramAssocMeasures.likelihood_ratio,
+            *(0, (2, 2), 2),
+        )
+
+    def test_lr_trigram(self):
+        self.assertAlmostEqual(
+            TrigramAssocMeasures.likelihood_ratio(1, (1, 1, 1), (1, 1, 1), 2),
+            5.545177444479562,
+            delta=_DELTA,
+        )
+        self.assertAlmostEqual(
+            TrigramAssocMeasures.likelihood_ratio(1, (1, 1, 1), (1, 1, 1), 1),
+            0.0,
+            delta=_DELTA,
+        )
+        self.assertRaises(
+            ValueError,
+            TrigramAssocMeasures.likelihood_ratio,
+            *(1, (1, 1, 2), (1, 1, 2), 2),
+        )
+
+    def test_lr_quadgram(self):
+        self.assertAlmostEqual(
+            QuadgramAssocMeasures.likelihood_ratio(
+                1, (1, 1, 1, 1), (1, 1, 1, 1, 1, 1), (1, 1, 1, 1), 2
+            ),
+            8.317766166719343,
+            delta=_DELTA,
+        )
+        self.assertAlmostEqual(
+            QuadgramAssocMeasures.likelihood_ratio(
+                1, (1, 1, 1, 1), (1, 1, 1, 1, 1, 1), (1, 1, 1, 1), 1
+            ),
+            0.0,
+            delta=_DELTA,
+        )
+        self.assertRaises(
+            ValueError,
+            QuadgramAssocMeasures.likelihood_ratio,
+            *(1, (1, 1, 1, 1), (1, 1, 1, 1, 1, 2), (1, 1, 1, 1), 1),
+        )
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_naivebayes.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_naivebayes.py
@@ -0,0 +1,21 @@
+import unittest
+
+from nltk.classify.naivebayes import NaiveBayesClassifier
+
+
+class NaiveBayesClassifierTest(unittest.TestCase):
+    def test_simple(self):
+        training_features = [
+            ({"nice": True, "good": True}, "positive"),
+            ({"bad": True, "mean": True}, "negative"),
+        ]
+
+        classifier = NaiveBayesClassifier.train(training_features)
+
+        result = classifier.prob_classify({"nice": True})
+        self.assertTrue(result.prob("positive") > result.prob("negative"))
+        self.assertEqual(result.max(), "positive")
+
+        result = classifier.prob_classify({"bad": True})
+        self.assertTrue(result.prob("positive") < result.prob("negative"))
+        self.assertEqual(result.max(), "negative")
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_nombank.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_nombank.py
@@ -0,0 +1,27 @@
+"""
+Unit tests for nltk.corpus.nombank
+"""
+
+import unittest
+
+from nltk.corpus import nombank
+
+# Load the nombank once.
+nombank.nouns()
+
+
+class NombankDemo(unittest.TestCase):
+    def test_numbers(self):
+        # No. of instances.
+        self.assertEqual(len(nombank.instances()), 114574)
+        # No. of rolesets
+        self.assertEqual(len(nombank.rolesets()), 5577)
+        # No. of nouns.
+        self.assertEqual(len(nombank.nouns()), 4704)
+
+    def test_instance(self):
+        self.assertEqual(nombank.instances()[0].roleset, "perc-sign.01")
+
+    def test_framefiles_fileids(self):
+        self.assertEqual(len(nombank.fileids()), 4705)
+        self.assertTrue(all(fileid.endswith(".xml") for fileid in nombank.fileids()))
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_pl196x.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_pl196x.py
@@ -0,0 +1,13 @@
+import unittest
+
+import nltk
+from nltk.corpus.reader import pl196x
+
+
+class TestCorpusViews(unittest.TestCase):
+    def test_corpus_reader(self):
+        pl196x_dir = nltk.data.find("corpora/pl196x")
+        pl = pl196x.Pl196xCorpusReader(
+            pl196x_dir, r".*\.xml", textids="textids.txt", cat_file="cats.txt"
+        )
+        pl.tagged_words(fileids=pl.fileids(), categories="cats.txt")
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_pos_tag.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_pos_tag.py
@@ -0,0 +1,117 @@
+"""
+Tests for nltk.pos_tag
+"""
+
+import io
+import unittest
+import unittest.mock
+
+from nltk import pos_tag, word_tokenize
+from nltk.help import brown_tagset, claws5_tagset, upenn_tagset
+
+UPENN_TAGSET_DOLLAR_TEST = """$: dollar
+    $ -$ --$ A$ C$ HK$ M$ NZ$ S$ U.S.$ US$
+PRP$: pronoun, possessive
+    her his mine my our ours their thy your
+WP$: WH-pronoun, possessive
+    whose
+"""
+
+BROWN_TAGSET_NNS_TEST = """NNS: noun, plural, common
+    irregularities presentments thanks reports voters laws legislators
+    years areas adjustments chambers $100 bonds courts sales details raises
+    sessions members congressmen votes polls calls ...
+"""
+
+CLAW5_TAGSET_VHD_TEST = """VHD: past tense form of the verb "HAVE"
+    had, 'd
+"""
+
+
+class TestPosTag(unittest.TestCase):
+    def test_pos_tag_eng(self):
+        text = "John's big idea isn't all that bad."
+        expected_tagged = [
+            ("John", "NNP"),
+            ("'s", "POS"),
+            ("big", "JJ"),
+            ("idea", "NN"),
+            ("is", "VBZ"),
+            ("n't", "RB"),
+            ("all", "PDT"),
+            ("that", "DT"),
+            ("bad", "JJ"),
+            (".", "."),
+        ]
+        assert pos_tag(word_tokenize(text)) == expected_tagged
+
+    def test_pos_tag_eng_universal(self):
+        text = "John's big idea isn't all that bad."
+        expected_tagged = [
+            ("John", "NOUN"),
+            ("'s", "PRT"),
+            ("big", "ADJ"),
+            ("idea", "NOUN"),
+            ("is", "VERB"),
+            ("n't", "ADV"),
+            ("all", "DET"),
+            ("that", "DET"),
+            ("bad", "ADJ"),
+            (".", "."),
+        ]
+        assert pos_tag(word_tokenize(text), tagset="universal") == expected_tagged
+
+    @unittest.mock.patch("sys.stdout", new_callable=io.StringIO)
+    def check_stdout(self, tagset, query_regex, expected_output, mock_stdout):
+        tagset(query_regex)
+        self.assertEqual(mock_stdout.getvalue(), expected_output)
+
+    def test_tagsets_upenn(self):
+        self.check_stdout(upenn_tagset, r".*\$", UPENN_TAGSET_DOLLAR_TEST)
+
+    def test_tagsets_brown(self):
+        self.check_stdout(brown_tagset, r"NNS", BROWN_TAGSET_NNS_TEST)
+
+    def test_tagsets_claw5(self):
+        self.check_stdout(claws5_tagset, r"VHD", CLAW5_TAGSET_VHD_TEST)
+
+    def test_pos_tag_rus(self):
+        text = "Илья оторопел и дважды перечитал бумажку."
+        expected_tagged = [
+            ("Илья", "S"),
+            ("оторопел", "V"),
+            ("и", "CONJ"),
+            ("дважды", "ADV"),
+            ("перечитал", "V"),
+            ("бумажку", "S"),
+            (".", "NONLEX"),
+        ]
+        assert pos_tag(word_tokenize(text), lang="rus") == expected_tagged
+
+    def test_pos_tag_rus_universal(self):
+        text = "Илья оторопел и дважды перечитал бумажку."
+        expected_tagged = [
+            ("Илья", "NOUN"),
+            ("оторопел", "VERB"),
+            ("и", "CONJ"),
+            ("дважды", "ADV"),
+            ("перечитал", "VERB"),
+            ("бумажку", "NOUN"),
+            (".", "."),
+        ]
+        assert (
+            pos_tag(word_tokenize(text), tagset="universal", lang="rus")
+            == expected_tagged
+        )
+
+    def test_pos_tag_unknown_lang(self):
+        text = "모르겠 습니 다"
+        self.assertRaises(NotImplementedError, pos_tag, word_tokenize(text), lang="kor")
+        # Test for default kwarg, `lang=None`
+        self.assertRaises(NotImplementedError, pos_tag, word_tokenize(text), lang=None)
+
+    def test_unspecified_lang(self):
+        # Tries to force the lang='eng' option.
+        text = "모르겠 습니 다"
+        expected_but_wrong = [("모르겠", "JJ"), ("습니", "NNP"), ("다", "NN")]
+        assert pos_tag(word_tokenize(text)) == expected_but_wrong
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_ribes.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_ribes.py
@@ -0,0 +1,246 @@
+from nltk.translate.ribes_score import corpus_ribes, word_rank_alignment
+
+
+def test_ribes_empty_worder():  # worder as in word order
+    # Verifies that these two sentences have no alignment,
+    # and hence have the lowest possible RIBES score.
+    hyp = "This is a nice sentence which I quite like".split()
+    ref = "Okay well that's neat and all but the reference's different".split()
+
+    assert word_rank_alignment(ref, hyp) == []
+
+    list_of_refs = [[ref]]
+    hypotheses = [hyp]
+    assert corpus_ribes(list_of_refs, hypotheses) == 0.0
+
+
+def test_ribes_one_worder():
+    # Verifies that these two sentences have just one match,
+    # and the RIBES score for this sentence with very little
+    # correspondence is 0.
+    hyp = "This is a nice sentence which I quite like".split()
+    ref = "Okay well that's nice and all but the reference's different".split()
+
+    assert word_rank_alignment(ref, hyp) == [3]
+
+    list_of_refs = [[ref]]
+    hypotheses = [hyp]
+    assert corpus_ribes(list_of_refs, hypotheses) == 0.0
+
+
+def test_ribes_two_worder():
+    # Verifies that these two sentences have two matches,
+    # but still get the lowest possible RIBES score due
+    # to the lack of similarity.
+    hyp = "This is a nice sentence which I quite like".split()
+    ref = "Okay well that's nice and all but the reference is different".split()
+
+    assert word_rank_alignment(ref, hyp) == [9, 3]
+
+    list_of_refs = [[ref]]
+    hypotheses = [hyp]
+    assert corpus_ribes(list_of_refs, hypotheses) == 0.0
+
+
+def test_ribes():
+    # Based on the doctest of the corpus_ribes function
+    hyp1 = [
+        "It",
+        "is",
+        "a",
+        "guide",
+        "to",
+        "action",
+        "which",
+        "ensures",
+        "that",
+        "the",
+        "military",
+        "always",
+        "obeys",
+        "the",
+        "commands",
+        "of",
+        "the",
+        "party",
+    ]
+    ref1a = [
+        "It",
+        "is",
+        "a",
+        "guide",
+        "to",
+        "action",
+        "that",
+        "ensures",
+        "that",
+        "the",
+        "military",
+        "will",
+        "forever",
+        "heed",
+        "Party",
+        "commands",
+    ]
+    ref1b = [
+        "It",
+        "is",
+        "the",
+        "guiding",
+        "principle",
+        "which",
+        "guarantees",
+        "the",
+        "military",
+        "forces",
+        "always",
+        "being",
+        "under",
+        "the",
+        "command",
+        "of",
+        "the",
+        "Party",
+    ]
+    ref1c = [
+        "It",
+        "is",
+        "the",
+        "practical",
+        "guide",
+        "for",
+        "the",
+        "army",
+        "always",
+        "to",
+        "heed",
+        "the",
+        "directions",
+        "of",
+        "the",
+        "party",
+    ]
+
+    hyp2 = [
+        "he",
+        "read",
+        "the",
+        "book",
+        "because",
+        "he",
+        "was",
+        "interested",
+        "in",
+        "world",
+        "history",
+    ]
+    ref2a = [
+        "he",
+        "was",
+        "interested",
+        "in",
+        "world",
+        "history",
+        "because",
+        "he",
+        "read",
+        "the",
+        "book",
+    ]
+
+    list_of_refs = [[ref1a, ref1b, ref1c], [ref2a]]
+    hypotheses = [hyp1, hyp2]
+
+    score = corpus_ribes(list_of_refs, hypotheses)
+
+    assert round(score, 4) == 0.3597
+
+
+def test_no_zero_div():
+    # Regression test for Issue 2529, assure that no ZeroDivisionError is thrown.
+    hyp1 = [
+        "It",
+        "is",
+        "a",
+        "guide",
+        "to",
+        "action",
+        "which",
+        "ensures",
+        "that",
+        "the",
+        "military",
+        "always",
+        "obeys",
+        "the",
+        "commands",
+        "of",
+        "the",
+        "party",
+    ]
+    ref1a = [
+        "It",
+        "is",
+        "a",
+        "guide",
+        "to",
+        "action",
+        "that",
+        "ensures",
+        "that",
+        "the",
+        "military",
+        "will",
+        "forever",
+        "heed",
+        "Party",
+        "commands",
+    ]
+    ref1b = [
+        "It",
+        "is",
+        "the",
+        "guiding",
+        "principle",
+        "which",
+        "guarantees",
+        "the",
+        "military",
+        "forces",
+        "always",
+        "being",
+        "under",
+        "the",
+        "command",
+        "of",
+        "the",
+        "Party",
+    ]
+    ref1c = [
+        "It",
+        "is",
+        "the",
+        "practical",
+        "guide",
+        "for",
+        "the",
+        "army",
+        "always",
+        "to",
+        "heed",
+        "the",
+        "directions",
+        "of",
+        "the",
+        "party",
+    ]
+
+    hyp2 = ["he", "read", "the"]
+    ref2a = ["he", "was", "interested", "in", "world", "history", "because", "he"]
+
+    list_of_refs = [[ref1a, ref1b, ref1c], [ref2a]]
+    hypotheses = [hyp1, hyp2]
+
+    score = corpus_ribes(list_of_refs, hypotheses)
+
+    assert round(score, 4) == 0.1688
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_rte_classify.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_rte_classify.py
@@ -0,0 +1,94 @@
+import pytest
+
+from nltk import config_megam
+from nltk.classify.rte_classify import RTEFeatureExtractor, rte_classifier, rte_features
+from nltk.corpus import rte as rte_corpus
+
+expected_from_rte_feature_extration = """
+alwayson        => True
+ne_hyp_extra    => 0
+ne_overlap      => 1
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 3
+word_overlap    => 3
+
+alwayson        => True
+ne_hyp_extra    => 0
+ne_overlap      => 1
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 2
+word_overlap    => 1
+
+alwayson        => True
+ne_hyp_extra    => 1
+ne_overlap      => 1
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 1
+word_overlap    => 2
+
+alwayson        => True
+ne_hyp_extra    => 1
+ne_overlap      => 0
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 6
+word_overlap    => 2
+
+alwayson        => True
+ne_hyp_extra    => 1
+ne_overlap      => 0
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 4
+word_overlap    => 0
+
+alwayson        => True
+ne_hyp_extra    => 1
+ne_overlap      => 0
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 3
+word_overlap    => 1
+"""
+
+
+class TestRTEClassifier:
+    # Test the feature extraction method.
+    def test_rte_feature_extraction(self):
+        pairs = rte_corpus.pairs(["rte1_dev.xml"])[:6]
+        test_output = [
+            f"{key:<15} => {rte_features(pair)[key]}"
+            for pair in pairs
+            for key in sorted(rte_features(pair))
+        ]
+        expected_output = expected_from_rte_feature_extration.strip().split("\n")
+        # Remove null strings.
+        expected_output = list(filter(None, expected_output))
+        assert test_output == expected_output
+
+    # Test the RTEFeatureExtractor object.
+    def test_feature_extractor_object(self):
+        rtepair = rte_corpus.pairs(["rte3_dev.xml"])[33]
+        extractor = RTEFeatureExtractor(rtepair)
+
+        assert extractor.hyp_words == {"member", "China", "SCO."}
+        assert extractor.overlap("word") == set()
+        assert extractor.overlap("ne") == {"China"}
+        assert extractor.hyp_extra("word") == {"member"}
+
+    # Test the RTE classifier training.
+    def test_rte_classification_without_megam(self):
+        # Use a sample size for unit testing, since we
+        # don't need to fully train these classifiers
+        clf = rte_classifier("IIS", sample_N=100)
+        clf = rte_classifier("GIS", sample_N=100)
+
+    def test_rte_classification_with_megam(self):
+        try:
+            config_megam()
+        except (LookupError, AttributeError) as e:
+            pytest.skip("Skipping tests with dependencies on MEGAM")
+        clf = rte_classifier("megam", sample_N=100)
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_seekable_unicode_stream_reader.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_seekable_unicode_stream_reader.py
@@ -0,0 +1,86 @@
+import os
+from io import BytesIO
+
+import pytest
+
+from nltk.corpus.reader import SeekableUnicodeStreamReader
+
+
+def check_reader(unicode_string, encoding):
+    bytestr = unicode_string.encode(encoding)
+    stream = BytesIO(bytestr)
+    reader = SeekableUnicodeStreamReader(stream, encoding)
+
+    # Should open at the start of the file
+    assert reader.tell() == 0
+
+    # Compare original string to contents from `.readlines()`
+    assert unicode_string == "".join(reader.readlines())
+
+    # Should be at the end of the file now
+    stream.seek(0, os.SEEK_END)
+    assert reader.tell() == stream.tell()
+
+    reader.seek(0)  # go back to start
+
+    # Compare original string to contents from `.read()`
+    contents = ""
+    char = None
+    while char != "":
+        char = reader.read(1)
+        contents += char
+    assert unicode_string == contents
+
+
+# Call `check_reader` with a variety of input strings and encodings.
+ENCODINGS = ["ascii", "latin1", "greek", "hebrew", "utf-16", "utf-8"]
+
+STRINGS = [
+    """
+    This is a test file.
+    It is fairly short.
+    """,
+    "This file can be encoded with latin1. \x83",
+    """\
+    This is a test file.
+    Here's a blank line:
+
+    And here's some unicode: \xee \u0123 \uffe3
+    """,
+    """\
+    This is a test file.
+    Unicode characters: \xf3 \u2222 \u3333\u4444 \u5555
+    """,
+    """\
+    This is a larger file.  It has some lines that are longer \
+    than 72 characters.  It's got lots of repetition.  Here's \
+    some unicode chars: \xee \u0123 \uffe3 \ueeee \u2345
+
+    How fun!  Let's repeat it twenty times.
+    """
+    * 20,
+]
+
+
+@pytest.mark.parametrize("string", STRINGS)
+def test_reader(string):
+    for encoding in ENCODINGS:
+        # skip strings that can't be encoded with the current encoding
+        try:
+            string.encode(encoding)
+        except UnicodeEncodeError:
+            continue
+        check_reader(string, encoding)
+
+
+def test_reader_stream_closes_when_deleted():
+    reader = SeekableUnicodeStreamReader(BytesIO(b""), "ascii")
+    assert not reader.stream.closed
+    reader.__del__()
+    assert reader.stream.closed
+
+
+def teardown_module(module=None):
+    import gc
+
+    gc.collect()
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_senna.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_senna.py
@@ -0,0 +1,112 @@
+"""
+Unit tests for Senna
+"""
+
+import unittest
+from os import environ, path, sep
+
+from nltk.classify import Senna
+from nltk.tag import SennaChunkTagger, SennaNERTagger, SennaTagger
+
+# Set Senna executable path for tests if it is not specified as an environment variable
+if "SENNA" in environ:
+    SENNA_EXECUTABLE_PATH = path.normpath(environ["SENNA"]) + sep
+else:
+    SENNA_EXECUTABLE_PATH = "/usr/share/senna-v3.0"
+
+senna_is_installed = path.exists(SENNA_EXECUTABLE_PATH)
+
+
+@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
+class TestSennaPipeline(unittest.TestCase):
+    """Unittest for nltk.classify.senna"""
+
+    def test_senna_pipeline(self):
+        """Senna pipeline interface"""
+
+        pipeline = Senna(SENNA_EXECUTABLE_PATH, ["pos", "chk", "ner"])
+        sent = "Dusseldorf is an international business center".split()
+        result = [
+            (token["word"], token["chk"], token["ner"], token["pos"])
+            for token in pipeline.tag(sent)
+        ]
+        expected = [
+            ("Dusseldorf", "B-NP", "B-LOC", "NNP"),
+            ("is", "B-VP", "O", "VBZ"),
+            ("an", "B-NP", "O", "DT"),
+            ("international", "I-NP", "O", "JJ"),
+            ("business", "I-NP", "O", "NN"),
+            ("center", "I-NP", "O", "NN"),
+        ]
+        self.assertEqual(result, expected)
+
+
+@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
+class TestSennaTagger(unittest.TestCase):
+    """Unittest for nltk.tag.senna"""
+
+    def test_senna_tagger(self):
+        tagger = SennaTagger(SENNA_EXECUTABLE_PATH)
+        result = tagger.tag("What is the airspeed of an unladen swallow ?".split())
+        expected = [
+            ("What", "WP"),
+            ("is", "VBZ"),
+            ("the", "DT"),
+            ("airspeed", "NN"),
+            ("of", "IN"),
+            ("an", "DT"),
+            ("unladen", "NN"),
+            ("swallow", "NN"),
+            ("?", "."),
+        ]
+        self.assertEqual(result, expected)
+
+    def test_senna_chunk_tagger(self):
+        chktagger = SennaChunkTagger(SENNA_EXECUTABLE_PATH)
+        result_1 = chktagger.tag("What is the airspeed of an unladen swallow ?".split())
+        expected_1 = [
+            ("What", "B-NP"),
+            ("is", "B-VP"),
+            ("the", "B-NP"),
+            ("airspeed", "I-NP"),
+            ("of", "B-PP"),
+            ("an", "B-NP"),
+            ("unladen", "I-NP"),
+            ("swallow", "I-NP"),
+            ("?", "O"),
+        ]
+
+        result_2 = list(chktagger.bio_to_chunks(result_1, chunk_type="NP"))
+        expected_2 = [
+            ("What", "0"),
+            ("the airspeed", "2-3"),
+            ("an unladen swallow", "5-6-7"),
+        ]
+        self.assertEqual(result_1, expected_1)
+        self.assertEqual(result_2, expected_2)
+
+    def test_senna_ner_tagger(self):
+        nertagger = SennaNERTagger(SENNA_EXECUTABLE_PATH)
+        result_1 = nertagger.tag("Shakespeare theatre was in London .".split())
+        expected_1 = [
+            ("Shakespeare", "B-PER"),
+            ("theatre", "O"),
+            ("was", "O"),
+            ("in", "O"),
+            ("London", "B-LOC"),
+            (".", "O"),
+        ]
+
+        result_2 = nertagger.tag("UN headquarters are in NY , USA .".split())
+        expected_2 = [
+            ("UN", "B-ORG"),
+            ("headquarters", "O"),
+            ("are", "O"),
+            ("in", "O"),
+            ("NY", "B-LOC"),
+            (",", "O"),
+            ("USA", "B-LOC"),
+            (".", "O"),
+        ]
+        self.assertEqual(result_1, expected_1)
+        self.assertEqual(result_2, expected_2)
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_stem.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_stem.py
@@ -0,0 +1,157 @@
+import unittest
+from contextlib import closing
+
+from nltk import data
+from nltk.stem.porter import PorterStemmer
+from nltk.stem.snowball import SnowballStemmer
+
+
+class SnowballTest(unittest.TestCase):
+    def test_arabic(self):
+        """
+        this unit testing for test the snowball arabic light stemmer
+        this stemmer deals with prefixes and suffixes
+        """
+        # Test where the ignore_stopwords=True.
+        ar_stemmer = SnowballStemmer("arabic", True)
+        assert ar_stemmer.stem("الْعَرَبِــــــيَّة") == "عرب"
+        assert ar_stemmer.stem("العربية") == "عرب"
+        assert ar_stemmer.stem("فقالوا") == "قال"
+        assert ar_stemmer.stem("الطالبات") == "طالب"
+        assert ar_stemmer.stem("فالطالبات") == "طالب"
+        assert ar_stemmer.stem("والطالبات") == "طالب"
+        assert ar_stemmer.stem("الطالبون") == "طالب"
+        assert ar_stemmer.stem("اللذان") == "اللذان"
+        assert ar_stemmer.stem("من") == "من"
+        # Test where the ignore_stopwords=False.
+        ar_stemmer = SnowballStemmer("arabic", False)
+        assert ar_stemmer.stem("اللذان") == "اللذ"  # this is a stop word
+        assert ar_stemmer.stem("الطالبات") == "طالب"
+        assert ar_stemmer.stem("الكلمات") == "كلم"
+        # test where create the arabic stemmer without given init value to ignore_stopwords
+        ar_stemmer = SnowballStemmer("arabic")
+        assert ar_stemmer.stem("الْعَرَبِــــــيَّة") == "عرب"
+        assert ar_stemmer.stem("العربية") == "عرب"
+        assert ar_stemmer.stem("فقالوا") == "قال"
+        assert ar_stemmer.stem("الطالبات") == "طالب"
+        assert ar_stemmer.stem("الكلمات") == "كلم"
+
+    def test_russian(self):
+        stemmer_russian = SnowballStemmer("russian")
+        assert stemmer_russian.stem("авантненькая") == "авантненьк"
+
+    def test_german(self):
+        stemmer_german = SnowballStemmer("german")
+        stemmer_german2 = SnowballStemmer("german", ignore_stopwords=True)
+
+        assert stemmer_german.stem("Schr\xe4nke") == "schrank"
+        assert stemmer_german2.stem("Schr\xe4nke") == "schrank"
+
+        assert stemmer_german.stem("keinen") == "kein"
+        assert stemmer_german2.stem("keinen") == "keinen"
+
+    def test_spanish(self):
+        stemmer = SnowballStemmer("spanish")
+
+        assert stemmer.stem("Visionado") == "vision"
+
+        # The word 'algue' was raising an IndexError
+        assert stemmer.stem("algue") == "algu"
+
+    def test_short_strings_bug(self):
+        stemmer = SnowballStemmer("english")
+        assert stemmer.stem("y's") == "y"
+
+
+class PorterTest(unittest.TestCase):
+    def _vocabulary(self):
+        with closing(
+            data.find("stemmers/porter_test/porter_vocabulary.txt").open(
+                encoding="utf-8"
+            )
+        ) as fp:
+            return fp.read().splitlines()
+
+    def _test_against_expected_output(self, stemmer_mode, expected_stems):
+        stemmer = PorterStemmer(mode=stemmer_mode)
+        for word, true_stem in zip(self._vocabulary(), expected_stems):
+            our_stem = stemmer.stem(word)
+            assert (
+                our_stem == true_stem
+            ), "{} should stem to {} in {} mode but got {}".format(
+                word,
+                true_stem,
+                stemmer_mode,
+                our_stem,
+            )
+
+    def test_vocabulary_martin_mode(self):
+        """Tests all words from the test vocabulary provided by M Porter
+
+        The sample vocabulary and output were sourced from
+        https://tartarus.org/martin/PorterStemmer/voc.txt and
+        https://tartarus.org/martin/PorterStemmer/output.txt
+        and are linked to from the Porter Stemmer algorithm's homepage
+        at https://tartarus.org/martin/PorterStemmer/
+        """
+        with closing(
+            data.find("stemmers/porter_test/porter_martin_output.txt").open(
+                encoding="utf-8"
+            )
+        ) as fp:
+            self._test_against_expected_output(
+                PorterStemmer.MARTIN_EXTENSIONS, fp.read().splitlines()
+            )
+
+    def test_vocabulary_nltk_mode(self):
+        with closing(
+            data.find("stemmers/porter_test/porter_nltk_output.txt").open(
+                encoding="utf-8"
+            )
+        ) as fp:
+            self._test_against_expected_output(
+                PorterStemmer.NLTK_EXTENSIONS, fp.read().splitlines()
+            )
+
+    def test_vocabulary_original_mode(self):
+        # The list of stems for this test was generated by taking the
+        # Martin-blessed stemmer from
+        # https://tartarus.org/martin/PorterStemmer/c.txt
+        # and removing all the --DEPARTURE-- sections from it and
+        # running it against Martin's test vocabulary.
+
+        with closing(
+            data.find("stemmers/porter_test/porter_original_output.txt").open(
+                encoding="utf-8"
+            )
+        ) as fp:
+            self._test_against_expected_output(
+                PorterStemmer.ORIGINAL_ALGORITHM, fp.read().splitlines()
+            )
+
+        self._test_against_expected_output(
+            PorterStemmer.ORIGINAL_ALGORITHM,
+            data.find("stemmers/porter_test/porter_original_output.txt")
+            .open(encoding="utf-8")
+            .read()
+            .splitlines(),
+        )
+
+    def test_oed_bug(self):
+        """Test for bug https://github.com/nltk/nltk/issues/1581
+
+        Ensures that 'oed' can be stemmed without throwing an error.
+        """
+        assert PorterStemmer().stem("oed") == "o"
+
+    def test_lowercase_option(self):
+        """Test for improvement on https://github.com/nltk/nltk/issues/2507
+
+        Ensures that stems are lowercased when `to_lowercase=True`
+        """
+        porter = PorterStemmer()
+        assert porter.stem("On") == "on"
+        assert porter.stem("I") == "i"
+        assert porter.stem("I", to_lowercase=False) == "I"
+        assert porter.stem("Github") == "github"
+        assert porter.stem("Github", to_lowercase=False) == "Github"
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_tag.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_tag.py
@@ -0,0 +1,23 @@
+def test_basic():
+    from nltk.tag import pos_tag
+    from nltk.tokenize import word_tokenize
+
+    result = pos_tag(word_tokenize("John's big idea isn't all that bad."))
+    assert result == [
+        ("John", "NNP"),
+        ("'s", "POS"),
+        ("big", "JJ"),
+        ("idea", "NN"),
+        ("is", "VBZ"),
+        ("n't", "RB"),
+        ("all", "PDT"),
+        ("that", "DT"),
+        ("bad", "JJ"),
+        (".", "."),
+    ]
+
+
+def setup_module(module):
+    import pytest
+
+    pytest.importorskip("numpy")
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_tgrep.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_tgrep.py
@@ -0,0 +1,779 @@
+#!/usr/bin/env python
+#
+# Natural Language Toolkit: TGrep search
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Will Roberts <wildwilhelm@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Unit tests for nltk.tgrep.
+"""
+
+
+import unittest
+
+from nltk import tgrep
+from nltk.tree import ParentedTree
+
+
+class TestSequenceFunctions(unittest.TestCase):
+    """
+    Class containing unit tests for nltk.tgrep.
+    """
+
+    def test_tokenize_simple(self):
+        """
+        Simple test of tokenization.
+        """
+        tokens = tgrep.tgrep_tokenize("A .. (B !< C . D) | ![<< (E , F) $ G]")
+        self.assertEqual(
+            tokens,
+            [
+                "A",
+                "..",
+                "(",
+                "B",
+                "!",
+                "<",
+                "C",
+                ".",
+                "D",
+                ")",
+                "|",
+                "!",
+                "[",
+                "<<",
+                "(",
+                "E",
+                ",",
+                "F",
+                ")",
+                "$",
+                "G",
+                "]",
+            ],
+        )
+
+    def test_tokenize_encoding(self):
+        """
+        Test that tokenization handles bytes and strs the same way.
+        """
+        self.assertEqual(
+            tgrep.tgrep_tokenize(b"A .. (B !< C . D) | ![<< (E , F) $ G]"),
+            tgrep.tgrep_tokenize("A .. (B !< C . D) | ![<< (E , F) $ G]"),
+        )
+
+    def test_tokenize_link_types(self):
+        """
+        Test tokenization of basic link types.
+        """
+        self.assertEqual(tgrep.tgrep_tokenize("A<B"), ["A", "<", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A>B"), ["A", ">", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A<3B"), ["A", "<3", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A>3B"), ["A", ">3", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A<,B"), ["A", "<,", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A>,B"), ["A", ">,", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A<-3B"), ["A", "<-3", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A>-3B"), ["A", ">-3", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A<-B"), ["A", "<-", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A>-B"), ["A", ">-", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A<'B"), ["A", "<'", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A>'B"), ["A", ">'", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A<:B"), ["A", "<:", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A>:B"), ["A", ">:", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A<<B"), ["A", "<<", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A>>B"), ["A", ">>", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A<<,B"), ["A", "<<,", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A>>,B"), ["A", ">>,", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A<<'B"), ["A", "<<'", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A>>'B"), ["A", ">>'", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A<<:B"), ["A", "<<:", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A>>:B"), ["A", ">>:", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A.B"), ["A", ".", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A,B"), ["A", ",", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A..B"), ["A", "..", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A,,B"), ["A", ",,", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A$B"), ["A", "$", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A$.B"), ["A", "$.", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A$,B"), ["A", "$,", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A$..B"), ["A", "$..", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A$,,B"), ["A", "$,,", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!<B"), ["A", "!", "<", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!>B"), ["A", "!", ">", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!<3B"), ["A", "!", "<3", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!>3B"), ["A", "!", ">3", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!<,B"), ["A", "!", "<,", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!>,B"), ["A", "!", ">,", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!<-3B"), ["A", "!", "<-3", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!>-3B"), ["A", "!", ">-3", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!<-B"), ["A", "!", "<-", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!>-B"), ["A", "!", ">-", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!<'B"), ["A", "!", "<'", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!>'B"), ["A", "!", ">'", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!<:B"), ["A", "!", "<:", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!>:B"), ["A", "!", ">:", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!<<B"), ["A", "!", "<<", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!>>B"), ["A", "!", ">>", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!<<,B"), ["A", "!", "<<,", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!>>,B"), ["A", "!", ">>,", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!<<'B"), ["A", "!", "<<'", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!>>'B"), ["A", "!", ">>'", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!<<:B"), ["A", "!", "<<:", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!>>:B"), ["A", "!", ">>:", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!.B"), ["A", "!", ".", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!,B"), ["A", "!", ",", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!..B"), ["A", "!", "..", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!,,B"), ["A", "!", ",,", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!$B"), ["A", "!", "$", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!$.B"), ["A", "!", "$.", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!$,B"), ["A", "!", "$,", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!$..B"), ["A", "!", "$..", "B"])
+        self.assertEqual(tgrep.tgrep_tokenize("A!$,,B"), ["A", "!", "$,,", "B"])
+
+    def test_tokenize_examples(self):
+        """
+        Test tokenization of the TGrep2 manual example patterns.
+        """
+        self.assertEqual(tgrep.tgrep_tokenize("NP < PP"), ["NP", "<", "PP"])
+        self.assertEqual(tgrep.tgrep_tokenize("/^NP/"), ["/^NP/"])
+        self.assertEqual(
+            tgrep.tgrep_tokenize("NP << PP . VP"), ["NP", "<<", "PP", ".", "VP"]
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize("NP << PP | . VP"), ["NP", "<<", "PP", "|", ".", "VP"]
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize("NP !<< PP [> NP | >> VP]"),
+            ["NP", "!", "<<", "PP", "[", ">", "NP", "|", ">>", "VP", "]"],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize("NP << (PP . VP)"),
+            ["NP", "<<", "(", "PP", ".", "VP", ")"],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize("NP <' (PP <, (IN < on))"),
+            ["NP", "<'", "(", "PP", "<,", "(", "IN", "<", "on", ")", ")"],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize("S < (A < B) < C"),
+            ["S", "<", "(", "A", "<", "B", ")", "<", "C"],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize("S < ((A < B) < C)"),
+            ["S", "<", "(", "(", "A", "<", "B", ")", "<", "C", ")"],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize("S < (A < B < C)"),
+            ["S", "<", "(", "A", "<", "B", "<", "C", ")"],
+        )
+        self.assertEqual(tgrep.tgrep_tokenize("A<B&.C"), ["A", "<", "B", "&", ".", "C"])
+
+    def test_tokenize_quoting(self):
+        """
+        Test tokenization of quoting.
+        """
+        self.assertEqual(
+            tgrep.tgrep_tokenize('"A<<:B"<<:"A $.. B"<"A>3B"<C'),
+            ['"A<<:B"', "<<:", '"A $.. B"', "<", '"A>3B"', "<", "C"],
+        )
+
+    def test_tokenize_nodenames(self):
+        """
+        Test tokenization of node names.
+        """
+        self.assertEqual(tgrep.tgrep_tokenize("Robert"), ["Robert"])
+        self.assertEqual(tgrep.tgrep_tokenize("/^[Bb]ob/"), ["/^[Bb]ob/"])
+        self.assertEqual(tgrep.tgrep_tokenize("*"), ["*"])
+        self.assertEqual(tgrep.tgrep_tokenize("__"), ["__"])
+        # test tokenization of NLTK tree position syntax
+        self.assertEqual(tgrep.tgrep_tokenize("N()"), ["N(", ")"])
+        self.assertEqual(tgrep.tgrep_tokenize("N(0,)"), ["N(", "0", ",", ")"])
+        self.assertEqual(tgrep.tgrep_tokenize("N(0,0)"), ["N(", "0", ",", "0", ")"])
+        self.assertEqual(
+            tgrep.tgrep_tokenize("N(0,0,)"), ["N(", "0", ",", "0", ",", ")"]
+        )
+
+    def test_tokenize_macros(self):
+        """
+        Test tokenization of macro definitions.
+        """
+        self.assertEqual(
+            tgrep.tgrep_tokenize(
+                "@ NP /^NP/;\n@ NN /^NN/;\n@NP [!< NP | < @NN] !$.. @NN"
+            ),
+            [
+                "@",
+                "NP",
+                "/^NP/",
+                ";",
+                "@",
+                "NN",
+                "/^NN/",
+                ";",
+                "@NP",
+                "[",
+                "!",
+                "<",
+                "NP",
+                "|",
+                "<",
+                "@NN",
+                "]",
+                "!",
+                "$..",
+                "@NN",
+            ],
+        )
+
+    def test_node_simple(self):
+        """
+        Test a simple use of tgrep for finding nodes matching a given
+        pattern.
+        """
+        tree = ParentedTree.fromstring(
+            "(S (NP (DT the) (JJ big) (NN dog)) " "(VP bit) (NP (DT a) (NN cat)))"
+        )
+        self.assertEqual(list(tgrep.tgrep_positions("NN", [tree])), [[(0, 2), (2, 1)]])
+        self.assertEqual(
+            list(tgrep.tgrep_nodes("NN", [tree])), [[tree[0, 2], tree[2, 1]]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions("NN|JJ", [tree])), [[(0, 1), (0, 2), (2, 1)]]
+        )
+
+    def test_node_printing(self):
+        """Test that the tgrep print operator ' is properly ignored."""
+        tree = ParentedTree.fromstring("(S (n x) (N x))")
+        self.assertEqual(
+            list(tgrep.tgrep_positions("N", [tree])),
+            list(tgrep.tgrep_positions("'N", [tree])),
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions("/[Nn]/", [tree])),
+            list(tgrep.tgrep_positions("'/[Nn]/", [tree])),
+        )
+
+    def test_node_encoding(self):
+        """
+        Test that tgrep search strings handles bytes and strs the same
+        way.
+        """
+        tree = ParentedTree.fromstring(
+            "(S (NP (DT the) (JJ big) (NN dog)) " "(VP bit) (NP (DT a) (NN cat)))"
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions(b"NN", [tree])),
+            list(tgrep.tgrep_positions(b"NN", [tree])),
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_nodes(b"NN", [tree])),
+            list(tgrep.tgrep_nodes("NN", [tree])),
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions(b"NN|JJ", [tree])),
+            list(tgrep.tgrep_positions("NN|JJ", [tree])),
+        )
+
+    def test_node_nocase(self):
+        """
+        Test selecting nodes using case insensitive node names.
+        """
+        tree = ParentedTree.fromstring("(S (n x) (N x))")
+        self.assertEqual(list(tgrep.tgrep_positions('"N"', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('i@"N"', [tree])), [[(0,), (1,)]])
+
+    def test_node_quoted(self):
+        """
+        Test selecting nodes using quoted node names.
+        """
+        tree = ParentedTree.fromstring('(N ("N" x) (N" x) ("\\" x))')
+        self.assertEqual(list(tgrep.tgrep_positions('"N"', [tree])), [[()]])
+        self.assertEqual(list(tgrep.tgrep_positions('"\\"N\\""', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('"N\\""', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('"\\"\\\\\\""', [tree])), [[(2,)]])
+
+    def test_node_regex(self):
+        """
+        Test regex matching on nodes.
+        """
+        tree = ParentedTree.fromstring("(S (NP-SBJ x) (NP x) (NNP x) (VP x))")
+        # This is a regular expression that matches any node whose
+        # name starts with NP, including NP-SBJ:
+        self.assertEqual(list(tgrep.tgrep_positions("/^NP/", [tree])), [[(0,), (1,)]])
+
+    def test_node_regex_2(self):
+        """
+        Test regex matching on nodes.
+        """
+        tree = ParentedTree.fromstring("(S (SBJ x) (SBJ1 x) (NP-SBJ x))")
+        self.assertEqual(list(tgrep.tgrep_positions("/^SBJ/", [tree])), [[(0,), (1,)]])
+        # This is a regular expression that matches any node whose
+        # name includes SBJ, including NP-SBJ:
+        self.assertEqual(
+            list(tgrep.tgrep_positions("/SBJ/", [tree])), [[(0,), (1,), (2,)]]
+        )
+
+    def test_node_tree_position(self):
+        """
+        Test matching on nodes based on NLTK tree position.
+        """
+        tree = ParentedTree.fromstring("(S (NP-SBJ x) (NP x) (NNP x) (VP x))")
+        # test all tree positions that are not leaves
+        leaf_positions = {tree.leaf_treeposition(x) for x in range(len(tree.leaves()))}
+        tree_positions = [x for x in tree.treepositions() if x not in leaf_positions]
+        for position in tree_positions:
+            node_id = f"N{position}"
+            tgrep_positions = list(tgrep.tgrep_positions(node_id, [tree]))
+            self.assertEqual(len(tgrep_positions[0]), 1)
+            self.assertEqual(tgrep_positions[0][0], position)
+
+    def test_node_noleaves(self):
+        """
+        Test node name matching with the search_leaves flag set to False.
+        """
+        tree = ParentedTree.fromstring("(S (A (T x)) (B (N x)))")
+        self.assertEqual(
+            list(tgrep.tgrep_positions("x", [tree])), [[(0, 0, 0), (1, 0, 0)]]
+        )
+        self.assertEqual(list(tgrep.tgrep_positions("x", [tree], False)), [[]])
+
+    def tests_rel_dominance(self):
+        """
+        Test matching nodes based on dominance relations.
+        """
+        tree = ParentedTree.fromstring("(S (A (T x)) (B (N x)))")
+        self.assertEqual(list(tgrep.tgrep_positions("* < T", [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* < T > S", [tree])), [[(0,)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* !< T", [tree])),
+            [[(), (0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0)]],
+        )
+        self.assertEqual(list(tgrep.tgrep_positions("* !< T > S", [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* > A", [tree])), [[(0, 0)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* > B", [tree])), [[(1, 0)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* !> B", [tree])),
+            [[(), (0,), (0, 0), (0, 0, 0), (1,), (1, 0, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* !> B >> S", [tree])), [[(0,), (0, 0), (1,)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* >> S", [tree])),
+            [[(0,), (0, 0), (1,), (1, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* >>, S", [tree])), [[(0,), (0, 0)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* >>' S", [tree])), [[(1,), (1, 0)]]
+        )
+        # Known issue:
+        # self.assertEqual(list(tgrep.tgrep_positions('* !>> S', [tree])),
+        #                 [[()]])
+        self.assertEqual(list(tgrep.tgrep_positions("* << T", [tree])), [[(), (0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* <<' T", [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* <<1 N", [tree])), [[(1,)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* !<< T", [tree])),
+            [[(0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0)]],
+        )
+        tree = ParentedTree.fromstring("(S (A (T x)) (B (T x) (N x )))")
+        self.assertEqual(list(tgrep.tgrep_positions("* <: T", [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* < T", [tree])), [[(0,), (1,)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* !<: T", [tree])),
+            [[(), (0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0), (1, 1), (1, 1, 0)]],
+        )
+        self.assertEqual(list(tgrep.tgrep_positions("* !<: T > S", [tree])), [[(1,)]])
+        tree = ParentedTree.fromstring("(S (T (A x) (B x)) (T (C x)))")
+        self.assertEqual(list(tgrep.tgrep_positions("* >: T", [tree])), [[(1, 0)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* !>: T", [tree])),
+            [[(), (0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1,), (1, 0, 0)]],
+        )
+        tree = ParentedTree.fromstring(
+            "(S (A (B (C (D (E (T x))))))" " (A (B (C (D (E (T x))) (N x)))))"
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* <<: T", [tree])),
+            [
+                [
+                    (0,),
+                    (0, 0),
+                    (0, 0, 0),
+                    (0, 0, 0, 0),
+                    (0, 0, 0, 0, 0),
+                    (1, 0, 0, 0),
+                    (1, 0, 0, 0, 0),
+                ]
+            ],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* >>: A", [tree])),
+            [
+                [
+                    (0, 0),
+                    (0, 0, 0),
+                    (0, 0, 0, 0),
+                    (0, 0, 0, 0, 0),
+                    (0, 0, 0, 0, 0, 0),
+                    (1, 0),
+                    (1, 0, 0),
+                ]
+            ],
+        )
+
+    def test_bad_operator(self):
+        """
+        Test error handling of undefined tgrep operators.
+        """
+        tree = ParentedTree.fromstring("(S (A (T x)) (B (N x)))")
+        self.assertRaises(
+            tgrep.TgrepException, list, tgrep.tgrep_positions("* >>> S", [tree])
+        )
+
+    def test_comments(self):
+        """
+        Test that comments are correctly filtered out of tgrep search
+        strings.
+        """
+        tree = ParentedTree.fromstring("(S (NN x) (NP x) (NN x))")
+        search1 = """
+        @ NP /^NP/;
+        @ NN /^NN/;
+        @NN
+        """
+        self.assertEqual(list(tgrep.tgrep_positions(search1, [tree])), [[(0,), (2,)]])
+        search2 = """
+        # macros
+        @ NP /^NP/;
+        @ NN /^NN/;
+
+        # search string
+        @NN
+        """
+        self.assertEqual(list(tgrep.tgrep_positions(search2, [tree])), [[(0,), (2,)]])
+
+    def test_rel_sister_nodes(self):
+        """
+        Test matching sister nodes in a tree.
+        """
+        tree = ParentedTree.fromstring("(S (A x) (B x) (C x))")
+        self.assertEqual(list(tgrep.tgrep_positions("* $. B", [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* $.. B", [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* $, B", [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* $,, B", [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* $ B", [tree])), [[(0,), (2,)]])
+
+    def tests_rel_indexed_children(self):
+        """
+        Test matching nodes based on their index in their parent node.
+        """
+        tree = ParentedTree.fromstring("(S (A x) (B x) (C x))")
+        self.assertEqual(list(tgrep.tgrep_positions("* >, S", [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* >1 S", [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* >2 S", [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* >3 S", [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* >' S", [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* >-1 S", [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* >-2 S", [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* >-3 S", [tree])), [[(0,)]])
+        tree = ParentedTree.fromstring(
+            "(S (D (A x) (B x) (C x)) (E (B x) (C x) (A x)) " "(F (C x) (A x) (B x)))"
+        )
+        self.assertEqual(list(tgrep.tgrep_positions("* <, A", [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* <1 A", [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* <2 A", [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* <3 A", [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* <' A", [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* <-1 A", [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* <-2 A", [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions("* <-3 A", [tree])), [[(0,)]])
+
+    def test_rel_precedence(self):
+        """
+        Test matching nodes based on precedence relations.
+        """
+        tree = ParentedTree.fromstring(
+            "(S (NP (NP (PP x)) (NP (AP x)))"
+            " (VP (AP (X (PP x)) (Y (AP x))))"
+            " (NP (RC (NP (AP x)))))"
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* . X", [tree])), [[(0,), (0, 1), (0, 1, 0)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* . Y", [tree])), [[(1, 0, 0), (1, 0, 0, 0)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* .. X", [tree])),
+            [[(0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* .. Y", [tree])),
+            [[(0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1, 0, 0), (1, 0, 0, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* , X", [tree])), [[(1, 0, 1), (1, 0, 1, 0)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* , Y", [tree])),
+            [[(2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* ,, X", [tree])),
+            [[(1, 0, 1), (1, 0, 1, 0), (2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions("* ,, Y", [tree])),
+            [[(2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]],
+        )
+
+    def test_examples(self):
+        """
+        Test the Basic Examples from the TGrep2 manual.
+        """
+        tree = ParentedTree.fromstring("(S (NP (AP x)) (NP (PP x)))")
+        # This matches any NP node that immediately dominates a PP:
+        self.assertEqual(list(tgrep.tgrep_positions("NP < PP", [tree])), [[(1,)]])
+
+        tree = ParentedTree.fromstring("(S (NP x) (VP x) (NP (PP x)) (VP x))")
+        # This matches an NP that dominates a PP and is immediately
+        # followed by a VP:
+        self.assertEqual(list(tgrep.tgrep_positions("NP << PP . VP", [tree])), [[(2,)]])
+
+        tree = ParentedTree.fromstring(
+            "(S (NP (AP x)) (NP (PP x)) " "(NP (DET x) (NN x)) (VP x))"
+        )
+        # This matches an NP that dominates a PP or is immediately
+        # followed by a VP:
+        self.assertEqual(
+            list(tgrep.tgrep_positions("NP << PP | . VP", [tree])), [[(1,), (2,)]]
+        )
+
+        tree = ParentedTree.fromstring(
+            "(S (NP (NP (PP x)) (NP (AP x)))"
+            " (VP (AP (NP (PP x)) (NP (AP x))))"
+            " (NP (RC (NP (AP x)))))"
+        )
+        # This matches an NP that does not dominate a PP. Also, the NP
+        # must either have a parent that is an NP or be dominated by a
+        # VP:
+        self.assertEqual(
+            list(tgrep.tgrep_positions("NP !<< PP [> NP | >> VP]", [tree])),
+            [[(0, 1), (1, 0, 1)]],
+        )
+
+        tree = ParentedTree.fromstring(
+            "(S (NP (AP (PP x) (VP x))) " "(NP (AP (PP x) (NP x))) (NP x))"
+        )
+        # This matches an NP that dominates a PP which itself is
+        # immediately followed by a VP. Note the use of parentheses to
+        # group ". VP" with the PP rather than with the NP:
+        self.assertEqual(
+            list(tgrep.tgrep_positions("NP << (PP . VP)", [tree])), [[(0,)]]
+        )
+
+        tree = ParentedTree.fromstring(
+            "(S (NP (DET a) (NN cat) (PP (IN on) (NP x)))"
+            " (NP (DET a) (NN cat) (PP (IN on) (NP x)) (PP x))"
+            " (NP x))"
+        )
+        # This matches an NP whose last child is a PP that begins with
+        # the preposition "on":
+        self.assertEqual(
+            list(tgrep.tgrep_positions("NP <' (PP <, (IN < on))", [tree])), [[(0,)]]
+        )
+
+        tree = ParentedTree.fromstring(
+            "(S (S (C x) (A (B x))) (S (C x) (A x)) " "(S (D x) (A (B x))))"
+        )
+        # The following pattern matches an S which has a child A and
+        # another child that is a C and that the A has a child B:
+        self.assertEqual(
+            list(tgrep.tgrep_positions("S < (A < B) < C", [tree])), [[(0,)]]
+        )
+
+        tree = ParentedTree.fromstring(
+            "(S (S (A (B x) (C x))) (S (S (C x) (A (B x)))))"
+        )
+        # However, this pattern means that S has child A and that A
+        # has children B and C:
+        self.assertEqual(
+            list(tgrep.tgrep_positions("S < ((A < B) < C)", [tree])), [[(0,)]]
+        )
+
+        # It is equivalent to this:
+        self.assertEqual(
+            list(tgrep.tgrep_positions("S < (A < B < C)", [tree])), [[(0,)]]
+        )
+
+    def test_use_macros(self):
+        """
+        Test defining and using tgrep2 macros.
+        """
+        tree = ParentedTree.fromstring(
+            "(VP (VB sold) (NP (DET the) "
+            "(NN heiress)) (NP (NN deed) (PREP to) "
+            "(NP (DET the) (NN school) (NN house))))"
+        )
+        self.assertEqual(
+            list(
+                tgrep.tgrep_positions(
+                    "@ NP /^NP/;\n@ NN /^NN/;\n@NP !< @NP !$.. @NN", [tree]
+                )
+            ),
+            [[(1,), (2, 2)]],
+        )
+        # use undefined macro @CNP
+        self.assertRaises(
+            tgrep.TgrepException,
+            list,
+            tgrep.tgrep_positions(
+                "@ NP /^NP/;\n@ NN /^NN/;\n@CNP !< @NP !$.. @NN", [tree]
+            ),
+        )
+
+    def test_tokenize_node_labels(self):
+        """Test tokenization of labeled nodes."""
+        self.assertEqual(
+            tgrep.tgrep_tokenize("S < @SBJ < (@VP < (@VB $.. @OBJ))"),
+            [
+                "S",
+                "<",
+                "@SBJ",
+                "<",
+                "(",
+                "@VP",
+                "<",
+                "(",
+                "@VB",
+                "$..",
+                "@OBJ",
+                ")",
+                ")",
+            ],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize("S < @SBJ=s < (@VP=v < (@VB $.. @OBJ))"),
+            [
+                "S",
+                "<",
+                "@SBJ",
+                "=",
+                "s",
+                "<",
+                "(",
+                "@VP",
+                "=",
+                "v",
+                "<",
+                "(",
+                "@VB",
+                "$..",
+                "@OBJ",
+                ")",
+                ")",
+            ],
+        )
+
+    def test_tokenize_segmented_patterns(self):
+        """Test tokenization of segmented patterns."""
+        self.assertEqual(
+            tgrep.tgrep_tokenize("S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =v"),
+            [
+                "S",
+                "<",
+                "@SBJ",
+                "=",
+                "s",
+                "<",
+                "(",
+                "@VP",
+                "=",
+                "v",
+                "<",
+                "(",
+                "@VB",
+                "$..",
+                "@OBJ",
+                ")",
+                ")",
+                ":",
+                "=s",
+                "..",
+                "=v",
+            ],
+        )
+
+    def test_labeled_nodes(self):
+        """
+        Test labeled nodes.
+
+        Test case from Emily M. Bender.
+        """
+        search = """
+            # macros
+            @ SBJ /SBJ/;
+            @ VP /VP/;
+            @ VB /VB/;
+            @ VPoB /V[PB]/;
+            @ OBJ /OBJ/;
+
+            # 1 svo
+            S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =v"""
+        sent1 = ParentedTree.fromstring(
+            "(S (NP-SBJ I) (VP (VB eat) (NP-OBJ (NNS apples))))"
+        )
+        sent2 = ParentedTree.fromstring(
+            "(S (VP (VB eat) (NP-OBJ (NNS apples))) (NP-SBJ I))"
+        )
+        search_firsthalf = search.split("\n\n")[0] + "S < @SBJ < (@VP < (@VB $.. @OBJ))"
+        search_rewrite = "S < (/.*SBJ/ $.. (/VP/ < (/VB/ $.. /.*OBJ/)))"
+
+        self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent1]))[0])
+        self.assertTrue(list(tgrep.tgrep_positions(search, [sent1]))[0])
+        self.assertTrue(list(tgrep.tgrep_positions(search_rewrite, [sent1]))[0])
+        self.assertEqual(
+            list(tgrep.tgrep_positions(search, [sent1])),
+            list(tgrep.tgrep_positions(search_rewrite, [sent1])),
+        )
+        self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent2]))[0])
+        self.assertFalse(list(tgrep.tgrep_positions(search, [sent2]))[0])
+        self.assertFalse(list(tgrep.tgrep_positions(search_rewrite, [sent2]))[0])
+        self.assertEqual(
+            list(tgrep.tgrep_positions(search, [sent2])),
+            list(tgrep.tgrep_positions(search_rewrite, [sent2])),
+        )
+
+    def test_multiple_conjs(self):
+        """
+        Test that multiple (3 or more) conjunctions of node relations are
+        handled properly.
+        """
+        sent = ParentedTree.fromstring("((A (B b) (C c)) (A (B b) (C c) (D d)))")
+        # search = '(A < B < C < D)'
+        # search_tworels = '(A < B < C)'
+        self.assertEqual(
+            list(tgrep.tgrep_positions("(A < B < C < D)", [sent])), [[(1,)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions("(A < B < C)", [sent])), [[(0,), (1,)]]
+        )
+
+    def test_trailing_semicolon(self):
+        """
+        Test that semicolons at the end of a tgrep2 search string won't
+        cause a parse failure.
+        """
+        tree = ParentedTree.fromstring(
+            "(S (NP (DT the) (JJ big) (NN dog)) " "(VP bit) (NP (DT a) (NN cat)))"
+        )
+        self.assertEqual(list(tgrep.tgrep_positions("NN", [tree])), [[(0, 2), (2, 1)]])
+        self.assertEqual(list(tgrep.tgrep_positions("NN;", [tree])), [[(0, 2), (2, 1)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions("NN;;", [tree])), [[(0, 2), (2, 1)]]
+        )
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_tokenize.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_tokenize.py
@@ -0,0 +1,905 @@
+"""
+Unit tests for nltk.tokenize.
+See also nltk/test/tokenize.doctest
+"""
+
+from typing import List, Tuple
+
+import pytest
+
+from nltk.tokenize import (
+    LegalitySyllableTokenizer,
+    StanfordSegmenter,
+    SyllableTokenizer,
+    TreebankWordTokenizer,
+    TweetTokenizer,
+    punkt,
+    sent_tokenize,
+    word_tokenize,
+)
+from nltk.tokenize.simple import CharTokenizer
+
+
+def load_stanford_segmenter():
+    try:
+        seg = StanfordSegmenter()
+        seg.default_config("ar")
+        seg.default_config("zh")
+        return True
+    except LookupError:
+        return False
+
+
+check_stanford_segmenter = pytest.mark.skipif(
+    not load_stanford_segmenter(),
+    reason="NLTK was unable to find stanford-segmenter.jar.",
+)
+
+
+class TestTokenize:
+    def test_tweet_tokenizer(self):
+        """
+        Test TweetTokenizer using words with special and accented characters.
+        """
+
+        tokenizer = TweetTokenizer(strip_handles=True, reduce_len=True)
+        s9 = "@myke: Let's test these words: resumé España München français"
+        tokens = tokenizer.tokenize(s9)
+        expected = [
+            ":",
+            "Let's",
+            "test",
+            "these",
+            "words",
+            ":",
+            "resumé",
+            "España",
+            "München",
+            "français",
+        ]
+        assert tokens == expected
+
+    @pytest.mark.parametrize(
+        "test_input, expecteds",
+        [
+            (
+                "My text 0106404243030 is great text",
+                (
+                    ["My", "text", "01064042430", "30", "is", "great", "text"],
+                    ["My", "text", "0106404243030", "is", "great", "text"],
+                ),
+            ),
+            (
+                "My ticket id is 1234543124123",
+                (
+                    ["My", "ticket", "id", "is", "12345431241", "23"],
+                    ["My", "ticket", "id", "is", "1234543124123"],
+                ),
+            ),
+            (
+                "@remy: This is waaaaayyyy too much for you!!!!!! 01064042430",
+                (
+                    [
+                        ":",
+                        "This",
+                        "is",
+                        "waaayyy",
+                        "too",
+                        "much",
+                        "for",
+                        "you",
+                        "!",
+                        "!",
+                        "!",
+                        "01064042430",
+                    ],
+                    [
+                        ":",
+                        "This",
+                        "is",
+                        "waaayyy",
+                        "too",
+                        "much",
+                        "for",
+                        "you",
+                        "!",
+                        "!",
+                        "!",
+                        "01064042430",
+                    ],
+                ),
+            ),
+            # Further tests from https://github.com/nltk/nltk/pull/2798#issuecomment-922533085,
+            # showing the TweetTokenizer performance for `match_phone_numbers=True` and
+            # `match_phone_numbers=False`.
+            (
+                # Some phone numbers are always tokenized, even with `match_phone_numbers=`False`
+                "My number is 06-46124080, except it's not.",
+                (
+                    [
+                        "My",
+                        "number",
+                        "is",
+                        "06-46124080",
+                        ",",
+                        "except",
+                        "it's",
+                        "not",
+                        ".",
+                    ],
+                    [
+                        "My",
+                        "number",
+                        "is",
+                        "06-46124080",
+                        ",",
+                        "except",
+                        "it's",
+                        "not",
+                        ".",
+                    ],
+                ),
+            ),
+            (
+                # Phone number here is only tokenized correctly if `match_phone_numbers=True`
+                "My number is 601-984-4813, except it's not.",
+                (
+                    [
+                        "My",
+                        "number",
+                        "is",
+                        "601-984-4813",
+                        ",",
+                        "except",
+                        "it's",
+                        "not",
+                        ".",
+                    ],
+                    [
+                        "My",
+                        "number",
+                        "is",
+                        "601-984-",
+                        "4813",
+                        ",",
+                        "except",
+                        "it's",
+                        "not",
+                        ".",
+                    ],
+                ),
+            ),
+            (
+                # Phone number here is only tokenized correctly if `match_phone_numbers=True`
+                "My number is (393)  928 -3010, except it's not.",
+                (
+                    [
+                        "My",
+                        "number",
+                        "is",
+                        "(393)  928 -3010",
+                        ",",
+                        "except",
+                        "it's",
+                        "not",
+                        ".",
+                    ],
+                    [
+                        "My",
+                        "number",
+                        "is",
+                        "(",
+                        "393",
+                        ")",
+                        "928",
+                        "-",
+                        "3010",
+                        ",",
+                        "except",
+                        "it's",
+                        "not",
+                        ".",
+                    ],
+                ),
+            ),
+            (
+                # A long number is tokenized correctly only if `match_phone_numbers=False`
+                "The product identification number is 48103284512.",
+                (
+                    [
+                        "The",
+                        "product",
+                        "identification",
+                        "number",
+                        "is",
+                        "4810328451",
+                        "2",
+                        ".",
+                    ],
+                    [
+                        "The",
+                        "product",
+                        "identification",
+                        "number",
+                        "is",
+                        "48103284512",
+                        ".",
+                    ],
+                ),
+            ),
+            (
+                # `match_phone_numbers=True` can have some unforeseen
+                "My favourite substraction is 240 - 1353.",
+                (
+                    ["My", "favourite", "substraction", "is", "240 - 1353", "."],
+                    ["My", "favourite", "substraction", "is", "240", "-", "1353", "."],
+                ),
+            ),
+        ],
+    )
+    def test_tweet_tokenizer_expanded(
+        self, test_input: str, expecteds: Tuple[List[str], List[str]]
+    ):
+        """
+        Test `match_phone_numbers` in TweetTokenizer.
+
+        Note that TweetTokenizer is also passed the following for these tests:
+            * strip_handles=True
+            * reduce_len=True
+
+        :param test_input: The input string to tokenize using TweetTokenizer.
+        :type test_input: str
+        :param expecteds: A 2-tuple of tokenized sentences. The first of the two
+            tokenized is the expected output of tokenization with `match_phone_numbers=True`.
+            The second of the two tokenized lists is the expected output of tokenization
+            with `match_phone_numbers=False`.
+        :type expecteds: Tuple[List[str], List[str]]
+        """
+        for match_phone_numbers, expected in zip([True, False], expecteds):
+            tokenizer = TweetTokenizer(
+                strip_handles=True,
+                reduce_len=True,
+                match_phone_numbers=match_phone_numbers,
+            )
+            predicted = tokenizer.tokenize(test_input)
+            assert predicted == expected
+
+    def test_sonority_sequencing_syllable_tokenizer(self):
+        """
+        Test SyllableTokenizer tokenizer.
+        """
+        tokenizer = SyllableTokenizer()
+        tokens = tokenizer.tokenize("justification")
+        assert tokens == ["jus", "ti", "fi", "ca", "tion"]
+
+    def test_syllable_tokenizer_numbers(self):
+        """
+        Test SyllableTokenizer tokenizer.
+        """
+        tokenizer = SyllableTokenizer()
+        text = "9" * 10000
+        tokens = tokenizer.tokenize(text)
+        assert tokens == [text]
+
+    def test_legality_principle_syllable_tokenizer(self):
+        """
+        Test LegalitySyllableTokenizer tokenizer.
+        """
+        from nltk.corpus import words
+
+        test_word = "wonderful"
+        tokenizer = LegalitySyllableTokenizer(words.words())
+        tokens = tokenizer.tokenize(test_word)
+        assert tokens == ["won", "der", "ful"]
+
+    @check_stanford_segmenter
+    def test_stanford_segmenter_arabic(self):
+        """
+        Test the Stanford Word Segmenter for Arabic (default config)
+        """
+        seg = StanfordSegmenter()
+        seg.default_config("ar")
+        sent = "يبحث علم الحاسوب استخدام الحوسبة بجميع اشكالها لحل المشكلات"
+        segmented_sent = seg.segment(sent.split())
+        assert segmented_sent.split() == [
+            "يبحث",
+            "علم",
+            "الحاسوب",
+            "استخدام",
+            "الحوسبة",
+            "ب",
+            "جميع",
+            "اشكال",
+            "ها",
+            "ل",
+            "حل",
+            "المشكلات",
+        ]
+
+    @check_stanford_segmenter
+    def test_stanford_segmenter_chinese(self):
+        """
+        Test the Stanford Word Segmenter for Chinese (default config)
+        """
+        seg = StanfordSegmenter()
+        seg.default_config("zh")
+        sent = "这是斯坦福中文分词器测试"
+        segmented_sent = seg.segment(sent.split())
+        assert segmented_sent.split() == [
+            "这",
+            "是",
+            "斯坦福",
+            "中文",
+            "分词器",
+            "测试",
+        ]
+
+    def test_phone_tokenizer(self):
+        """
+        Test a string that resembles a phone number but contains a newline
+        """
+
+        # Should be recognized as a phone number, albeit one with multiple spaces
+        tokenizer = TweetTokenizer()
+        test1 = "(393)  928 -3010"
+        expected = ["(393)  928 -3010"]
+        result = tokenizer.tokenize(test1)
+        assert result == expected
+
+        # Due to newline, first three elements aren't part of a phone number;
+        # fourth is
+        test2 = "(393)\n928 -3010"
+        expected = ["(", "393", ")", "928 -3010"]
+        result = tokenizer.tokenize(test2)
+        assert result == expected
+
+    def test_emoji_tokenizer(self):
+        """
+        Test a string that contains Emoji ZWJ Sequences and skin tone modifier
+        """
+        tokenizer = TweetTokenizer()
+
+        # A Emoji ZWJ Sequences, they together build as a single emoji, should not be split.
+        test1 = "👨‍👩‍👧‍👧"
+        expected = ["👨‍👩‍👧‍👧"]
+        result = tokenizer.tokenize(test1)
+        assert result == expected
+
+        # A Emoji with skin tone modifier, the two characters build a single emoji, should not be split.
+        test2 = "👨🏿"
+        expected = ["👨🏿"]
+        result = tokenizer.tokenize(test2)
+        assert result == expected
+
+        # A string containing both skin tone modifier and ZWJ Sequences
+        test3 = "🤔 🙈 me así, se😌 ds 💕👭👙 hello 👩🏾‍🎓 emoji hello 👨‍👩‍👦‍👦 how are 😊 you today🙅🏽🙅🏽"
+        expected = [
+            "🤔",
+            "🙈",
+            "me",
+            "así",
+            ",",
+            "se",
+            "😌",
+            "ds",
+            "💕",
+            "👭",
+            "👙",
+            "hello",
+            "👩🏾\u200d🎓",
+            "emoji",
+            "hello",
+            "👨\u200d👩\u200d👦\u200d👦",
+            "how",
+            "are",
+            "😊",
+            "you",
+            "today",
+            "🙅🏽",
+            "🙅🏽",
+        ]
+        result = tokenizer.tokenize(test3)
+        assert result == expected
+
+        # emoji flag sequences, including enclosed letter pairs
+        # Expected behavior from #3034
+        test4 = "🇦🇵🇵🇱🇪"
+        expected = ["🇦🇵", "🇵🇱", "🇪"]
+        result = tokenizer.tokenize(test4)
+        assert result == expected
+
+        test5 = "Hi 🇨🇦, 😍!!"
+        expected = ["Hi", "🇨🇦", ",", "😍", "!", "!"]
+        result = tokenizer.tokenize(test5)
+        assert result == expected
+
+        test6 = "<3 🇨🇦 🤝 🇵🇱 <3"
+        expected = ["<3", "🇨🇦", "🤝", "🇵🇱", "<3"]
+        result = tokenizer.tokenize(test6)
+        assert result == expected
+
+    def test_pad_asterisk(self):
+        """
+        Test padding of asterisk for word tokenization.
+        """
+        text = "This is a, *weird sentence with *asterisks in it."
+        expected = [
+            "This",
+            "is",
+            "a",
+            ",",
+            "*",
+            "weird",
+            "sentence",
+            "with",
+            "*",
+            "asterisks",
+            "in",
+            "it",
+            ".",
+        ]
+        assert word_tokenize(text) == expected
+
+    def test_pad_dotdot(self):
+        """
+        Test padding of dotdot* for word tokenization.
+        """
+        text = "Why did dotdot.. not get tokenized but dotdotdot... did? How about manydots....."
+        expected = [
+            "Why",
+            "did",
+            "dotdot",
+            "..",
+            "not",
+            "get",
+            "tokenized",
+            "but",
+            "dotdotdot",
+            "...",
+            "did",
+            "?",
+            "How",
+            "about",
+            "manydots",
+            ".....",
+        ]
+        assert word_tokenize(text) == expected
+
+    def test_remove_handle(self):
+        """
+        Test remove_handle() from casual.py with specially crafted edge cases
+        """
+
+        tokenizer = TweetTokenizer(strip_handles=True)
+
+        # Simple example. Handles with just numbers should be allowed
+        test1 = "@twitter hello @twi_tter_. hi @12345 @123news"
+        expected = ["hello", ".", "hi"]
+        result = tokenizer.tokenize(test1)
+        assert result == expected
+
+        # Handles are allowed to follow any of the following characters
+        test2 = "@n`@n~@n(@n)@n-@n=@n+@n\\@n|@n[@n]@n{@n}@n;@n:@n'@n\"@n/@n?@n.@n,@n<@n>@n @n\n@n ñ@n.ü@n.ç@n."
+        expected = [
+            "`",
+            "~",
+            "(",
+            ")",
+            "-",
+            "=",
+            "+",
+            "\\",
+            "|",
+            "[",
+            "]",
+            "{",
+            "}",
+            ";",
+            ":",
+            "'",
+            '"',
+            "/",
+            "?",
+            ".",
+            ",",
+            "<",
+            ">",
+            "ñ",
+            ".",
+            "ü",
+            ".",
+            "ç",
+            ".",
+        ]
+        result = tokenizer.tokenize(test2)
+        assert result == expected
+
+        # Handles are NOT allowed to follow any of the following characters
+        test3 = "a@n j@n z@n A@n L@n Z@n 1@n 4@n 7@n 9@n 0@n _@n !@n @@n #@n $@n %@n &@n *@n"
+        expected = [
+            "a",
+            "@n",
+            "j",
+            "@n",
+            "z",
+            "@n",
+            "A",
+            "@n",
+            "L",
+            "@n",
+            "Z",
+            "@n",
+            "1",
+            "@n",
+            "4",
+            "@n",
+            "7",
+            "@n",
+            "9",
+            "@n",
+            "0",
+            "@n",
+            "_",
+            "@n",
+            "!",
+            "@n",
+            "@",
+            "@n",
+            "#",
+            "@n",
+            "$",
+            "@n",
+            "%",
+            "@n",
+            "&",
+            "@n",
+            "*",
+            "@n",
+        ]
+        result = tokenizer.tokenize(test3)
+        assert result == expected
+
+        # Handles are allowed to precede the following characters
+        test4 = "@n!a @n#a @n$a @n%a @n&a @n*a"
+        expected = ["!", "a", "#", "a", "$", "a", "%", "a", "&", "a", "*", "a"]
+        result = tokenizer.tokenize(test4)
+        assert result == expected
+
+        # Tests interactions with special symbols and multiple @
+        test5 = "@n!@n @n#@n @n$@n @n%@n @n&@n @n*@n @n@n @@n @n@@n @n_@n @n7@n @nj@n"
+        expected = [
+            "!",
+            "@n",
+            "#",
+            "@n",
+            "$",
+            "@n",
+            "%",
+            "@n",
+            "&",
+            "@n",
+            "*",
+            "@n",
+            "@n",
+            "@n",
+            "@",
+            "@n",
+            "@n",
+            "@",
+            "@n",
+            "@n_",
+            "@n",
+            "@n7",
+            "@n",
+            "@nj",
+            "@n",
+        ]
+        result = tokenizer.tokenize(test5)
+        assert result == expected
+
+        # Tests that handles can have a max length of 15
+        test6 = "@abcdefghijklmnopqrstuvwxyz @abcdefghijklmno1234 @abcdefghijklmno_ @abcdefghijklmnoendofhandle"
+        expected = ["pqrstuvwxyz", "1234", "_", "endofhandle"]
+        result = tokenizer.tokenize(test6)
+        assert result == expected
+
+        # Edge case where an @ comes directly after a long handle
+        test7 = "@abcdefghijklmnop@abcde @abcdefghijklmno@abcde @abcdefghijklmno_@abcde @abcdefghijklmno5@abcde"
+        expected = [
+            "p",
+            "@abcde",
+            "@abcdefghijklmno",
+            "@abcde",
+            "_",
+            "@abcde",
+            "5",
+            "@abcde",
+        ]
+        result = tokenizer.tokenize(test7)
+        assert result == expected
+
+    def test_treebank_span_tokenizer(self):
+        """
+        Test TreebankWordTokenizer.span_tokenize function
+        """
+
+        tokenizer = TreebankWordTokenizer()
+
+        # Test case in the docstring
+        test1 = "Good muffins cost $3.88\nin New (York).  Please (buy) me\ntwo of them.\n(Thanks)."
+        expected = [
+            (0, 4),
+            (5, 12),
+            (13, 17),
+            (18, 19),
+            (19, 23),
+            (24, 26),
+            (27, 30),
+            (31, 32),
+            (32, 36),
+            (36, 37),
+            (37, 38),
+            (40, 46),
+            (47, 48),
+            (48, 51),
+            (51, 52),
+            (53, 55),
+            (56, 59),
+            (60, 62),
+            (63, 68),
+            (69, 70),
+            (70, 76),
+            (76, 77),
+            (77, 78),
+        ]
+        result = list(tokenizer.span_tokenize(test1))
+        assert result == expected
+
+        # Test case with double quotation
+        test2 = 'The DUP is similar to the "religious right" in the United States and takes a hardline stance on social issues'
+        expected = [
+            (0, 3),
+            (4, 7),
+            (8, 10),
+            (11, 18),
+            (19, 21),
+            (22, 25),
+            (26, 27),
+            (27, 36),
+            (37, 42),
+            (42, 43),
+            (44, 46),
+            (47, 50),
+            (51, 57),
+            (58, 64),
+            (65, 68),
+            (69, 74),
+            (75, 76),
+            (77, 85),
+            (86, 92),
+            (93, 95),
+            (96, 102),
+            (103, 109),
+        ]
+        result = list(tokenizer.span_tokenize(test2))
+        assert result == expected
+
+        # Test case with double qoutation as well as converted quotations
+        test3 = "The DUP is similar to the \"religious right\" in the United States and takes a ``hardline'' stance on social issues"
+        expected = [
+            (0, 3),
+            (4, 7),
+            (8, 10),
+            (11, 18),
+            (19, 21),
+            (22, 25),
+            (26, 27),
+            (27, 36),
+            (37, 42),
+            (42, 43),
+            (44, 46),
+            (47, 50),
+            (51, 57),
+            (58, 64),
+            (65, 68),
+            (69, 74),
+            (75, 76),
+            (77, 79),
+            (79, 87),
+            (87, 89),
+            (90, 96),
+            (97, 99),
+            (100, 106),
+            (107, 113),
+        ]
+        result = list(tokenizer.span_tokenize(test3))
+        assert result == expected
+
+    def test_word_tokenize(self):
+        """
+        Test word_tokenize function
+        """
+
+        sentence = "The 'v', I've been fooled but I'll seek revenge."
+        expected = [
+            "The",
+            "'",
+            "v",
+            "'",
+            ",",
+            "I",
+            "'ve",
+            "been",
+            "fooled",
+            "but",
+            "I",
+            "'ll",
+            "seek",
+            "revenge",
+            ".",
+        ]
+        assert word_tokenize(sentence) == expected
+
+        sentence = "'v' 're'"
+        expected = ["'", "v", "'", "'re", "'"]
+        assert word_tokenize(sentence) == expected
+
+    def test_punkt_pair_iter(self):
+        test_cases = [
+            ("12", [("1", "2"), ("2", None)]),
+            ("123", [("1", "2"), ("2", "3"), ("3", None)]),
+            ("1234", [("1", "2"), ("2", "3"), ("3", "4"), ("4", None)]),
+        ]
+
+        for test_input, expected_output in test_cases:
+            actual_output = [x for x in punkt._pair_iter(test_input)]
+
+            assert actual_output == expected_output
+
+    def test_punkt_pair_iter_handles_stop_iteration_exception(self):
+        # test input to trigger StopIteration from next()
+        it = iter([])
+        # call method under test and produce a generator
+        gen = punkt._pair_iter(it)
+        # unpack generator, ensure that no error is raised
+        list(gen)
+
+    def test_punkt_tokenize_words_handles_stop_iteration_exception(self):
+        obj = punkt.PunktBaseClass()
+
+        class TestPunktTokenizeWordsMock:
+            def word_tokenize(self, s):
+                return iter([])
+
+        obj._lang_vars = TestPunktTokenizeWordsMock()
+        # unpack generator, ensure that no error is raised
+        list(obj._tokenize_words("test"))
+
+    def test_punkt_tokenize_custom_lang_vars(self):
+        # Create LangVars including a full stop end character as used in Bengali
+        class BengaliLanguageVars(punkt.PunktLanguageVars):
+            sent_end_chars = (".", "?", "!", "\u0964")
+
+        obj = punkt.PunktSentenceTokenizer(lang_vars=BengaliLanguageVars())
+
+        # We now expect these sentences to be split up into the individual sentences
+        sentences = "উপরাষ্ট্রপতি শ্রী এম ভেঙ্কাইয়া নাইডু সোমবার আই আই টি দিল্লির হীরক জয়ন্তী উদযাপনের উদ্বোধন করেছেন। অনলাইনের মাধ্যমে এই অনুষ্ঠানে কেন্দ্রীয় মানব সম্পদ উন্নয়নমন্ত্রী শ্রী রমেশ পোখরিয়াল ‘নিশাঙ্ক’  উপস্থিত ছিলেন। এই উপলক্ষ্যে উপরাষ্ট্রপতি হীরকজয়ন্তীর লোগো এবং ২০৩০-এর জন্য প্রতিষ্ঠানের লক্ষ্য ও পরিকল্পনার নথি প্রকাশ করেছেন।"
+        expected = [
+            "উপরাষ্ট্রপতি শ্রী এম ভেঙ্কাইয়া নাইডু সোমবার আই আই টি দিল্লির হীরক জয়ন্তী উদযাপনের উদ্বোধন করেছেন।",
+            "অনলাইনের মাধ্যমে এই অনুষ্ঠানে কেন্দ্রীয় মানব সম্পদ উন্নয়নমন্ত্রী শ্রী রমেশ পোখরিয়াল ‘নিশাঙ্ক’  উপস্থিত ছিলেন।",
+            "এই উপলক্ষ্যে উপরাষ্ট্রপতি হীরকজয়ন্তীর লোগো এবং ২০৩০-এর জন্য প্রতিষ্ঠানের লক্ষ্য ও পরিকল্পনার নথি প্রকাশ করেছেন।",
+        ]
+
+        assert obj.tokenize(sentences) == expected
+
+    def test_punkt_tokenize_no_custom_lang_vars(self):
+        obj = punkt.PunktSentenceTokenizer()
+
+        # We expect these sentences to not be split properly, as the Bengali full stop '।' is not included in the default language vars
+        sentences = "উপরাষ্ট্রপতি শ্রী এম ভেঙ্কাইয়া নাইডু সোমবার আই আই টি দিল্লির হীরক জয়ন্তী উদযাপনের উদ্বোধন করেছেন। অনলাইনের মাধ্যমে এই অনুষ্ঠানে কেন্দ্রীয় মানব সম্পদ উন্নয়নমন্ত্রী শ্রী রমেশ পোখরিয়াল ‘নিশাঙ্ক’  উপস্থিত ছিলেন। এই উপলক্ষ্যে উপরাষ্ট্রপতি হীরকজয়ন্তীর লোগো এবং ২০৩০-এর জন্য প্রতিষ্ঠানের লক্ষ্য ও পরিকল্পনার নথি প্রকাশ করেছেন।"
+        expected = [
+            "উপরাষ্ট্রপতি শ্রী এম ভেঙ্কাইয়া নাইডু সোমবার আই আই টি দিল্লির হীরক জয়ন্তী উদযাপনের উদ্বোধন করেছেন। অনলাইনের মাধ্যমে এই অনুষ্ঠানে কেন্দ্রীয় মানব সম্পদ উন্নয়নমন্ত্রী শ্রী রমেশ পোখরিয়াল ‘নিশাঙ্ক’  উপস্থিত ছিলেন। এই উপলক্ষ্যে উপরাষ্ট্রপতি হীরকজয়ন্তীর লোগো এবং ২০৩০-এর জন্য প্রতিষ্ঠানের লক্ষ্য ও পরিকল্পনার নথি প্রকাশ করেছেন।"
+        ]
+
+        assert obj.tokenize(sentences) == expected
+
+    @pytest.mark.parametrize(
+        "input_text,n_sents,n_splits,lang_vars",
+        [
+            # Test debug_decisions on a text with two sentences, split by a dot.
+            ("Subject: Some subject. Attachments: Some attachments", 2, 1),
+            # The sentence should be split into two sections,
+            # with one split and hence one decision.
+            # Test debug_decisions on a text with two sentences, split by an exclamation mark.
+            ("Subject: Some subject! Attachments: Some attachments", 2, 1),
+            # The sentence should be split into two sections,
+            # with one split and hence one decision.
+            # Test debug_decisions on a text with one sentences,
+            # which is not split.
+            ("This is just a normal sentence, just like any other.", 1, 0),
+            # Hence just 1
+        ],
+    )
+    def punkt_debug_decisions(self, input_text, n_sents, n_splits, lang_vars=None):
+        tokenizer = punkt.PunktSentenceTokenizer()
+        if lang_vars != None:
+            tokenizer._lang_vars = lang_vars
+
+        assert len(tokenizer.tokenize(input_text)) == n_sents
+        assert len(list(tokenizer.debug_decisions(input_text))) == n_splits
+
+    def test_punkt_debug_decisions_custom_end(self):
+        # Test debug_decisions on a text with two sentences,
+        # split by a custom end character, based on Issue #2519
+        class ExtLangVars(punkt.PunktLanguageVars):
+            sent_end_chars = (".", "?", "!", "^")
+
+        self.punkt_debug_decisions(
+            "Subject: Some subject^ Attachments: Some attachments",
+            n_sents=2,
+            n_splits=1,
+            lang_vars=ExtLangVars(),
+        )
+        # The sentence should be split into two sections,
+        # with one split and hence one decision.
+
+    @pytest.mark.parametrize(
+        "sentences, expected",
+        [
+            (
+                "this is a test. . new sentence.",
+                ["this is a test.", ".", "new sentence."],
+            ),
+            ("This. . . That", ["This.", ".", ".", "That"]),
+            ("This..... That", ["This..... That"]),
+            ("This... That", ["This... That"]),
+            ("This.. . That", ["This.. .", "That"]),
+            ("This. .. That", ["This.", ".. That"]),
+            ("This. ,. That", ["This.", ",.", "That"]),
+            ("This!!! That", ["This!!!", "That"]),
+            ("This! That", ["This!", "That"]),
+            (
+                "1. This is R .\n2. This is A .\n3. That's all",
+                ["1.", "This is R .", "2.", "This is A .", "3.", "That's all"],
+            ),
+            (
+                "1. This is R .\t2. This is A .\t3. That's all",
+                ["1.", "This is R .", "2.", "This is A .", "3.", "That's all"],
+            ),
+            ("Hello.\tThere", ["Hello.", "There"]),
+        ],
+    )
+    def test_sent_tokenize(self, sentences: str, expected: List[str]):
+        assert sent_tokenize(sentences) == expected
+
+    def test_string_tokenizer(self) -> None:
+        sentence = "Hello there"
+        tokenizer = CharTokenizer()
+        assert tokenizer.tokenize(sentence) == list(sentence)
+        assert list(tokenizer.span_tokenize(sentence)) == [
+            (0, 1),
+            (1, 2),
+            (2, 3),
+            (3, 4),
+            (4, 5),
+            (5, 6),
+            (6, 7),
+            (7, 8),
+            (8, 9),
+            (9, 10),
+            (10, 11),
+        ]
+
+
+class TestPunktTrainer:
+    def test_punkt_train(self) -> None:
+        trainer = punkt.PunktTrainer()
+        trainer.train("This is a test.")
+
+    def test_punkt_train_single_word(self) -> None:
+        trainer = punkt.PunktTrainer()
+        trainer.train("This.")
+
+    def test_punkt_train_no_punc(self) -> None:
+        trainer = punkt.PunktTrainer()
+        trainer.train("This is a test")
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_twitter_auth.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_twitter_auth.py
@@ -0,0 +1,77 @@
+"""
+Tests for static parts of Twitter package
+"""
+
+import os
+
+import pytest
+
+pytest.importorskip("twython")
+
+from nltk.twitter import Authenticate
+
+
+@pytest.fixture
+def auth():
+    return Authenticate()
+
+
+class TestCredentials:
+    """
+    Tests that Twitter credentials from a file are handled correctly.
+    """
+
+    @classmethod
+    def setup_class(self):
+        self.subdir = os.path.join(os.path.dirname(__file__), "files")
+        os.environ["TWITTER"] = "twitter-files"
+
+    def test_environment(self, auth):
+        """
+        Test that environment variable has been read correctly.
+        """
+        fn = os.path.basename(auth.creds_subdir)
+        assert fn == os.environ["TWITTER"]
+
+    @pytest.mark.parametrize(
+        "kwargs",
+        [
+            # Each of the following scenarios should raise an error:
+            # An empty subdir path
+            {"subdir": ""},
+            # A subdir path of None
+            {"subdir": None},
+            # A nonexistent directory
+            {"subdir": "/nosuchdir"},
+            # 'credentials.txt' is not in default subdir, as read from `os.environ['TWITTER']`
+            {},
+            # Nonexistent credentials file ('foobar')
+            {"creds_file": "foobar"},
+            # 'bad_oauth1-1.txt' is incomplete
+            {"creds_file": "bad_oauth1-1.txt"},
+            # The first key in credentials file 'bad_oauth1-2.txt' is ill-formed
+            {"creds_file": "bad_oauth1-2.txt"},
+            # The first two lines in 'bad_oauth1-3.txt' are collapsed
+            {"creds_file": "bad_oauth1-3.txt"},
+        ],
+    )
+    def test_scenarios_that_should_raise_errors(self, kwargs, auth):
+        """Various scenarios that should raise errors"""
+        try:
+            auth.load_creds(**kwargs)
+        # raises ValueError (zero length field name in format) for python 2.6
+        # OSError for the rest
+        except (OSError, ValueError):
+            pass
+        except Exception as e:
+            pytest.fail("Unexpected exception thrown: %s" % e)
+        else:
+            pytest.fail("OSError exception not thrown.")
+
+    def test_correct_file(self, auth):
+        """Test that a proper file succeeds and is read correctly"""
+        oauth = auth.load_creds(subdir=self.subdir)
+
+        assert auth.creds_fullpath == os.path.join(self.subdir, auth.creds_file)
+        assert auth.creds_file == "credentials.txt"
+        assert oauth["app_key"] == "a"
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_util.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_util.py
@@ -0,0 +1,82 @@
+import pytest
+
+from nltk.util import everygrams
+
+
+@pytest.fixture
+def everygram_input():
+    """Form test data for tests."""
+    return iter(["a", "b", "c"])
+
+
+def test_everygrams_without_padding(everygram_input):
+    expected_output = [
+        ("a",),
+        ("a", "b"),
+        ("a", "b", "c"),
+        ("b",),
+        ("b", "c"),
+        ("c",),
+    ]
+    output = list(everygrams(everygram_input))
+    assert output == expected_output
+
+
+def test_everygrams_max_len(everygram_input):
+    expected_output = [
+        ("a",),
+        ("a", "b"),
+        ("b",),
+        ("b", "c"),
+        ("c",),
+    ]
+    output = list(everygrams(everygram_input, max_len=2))
+    assert output == expected_output
+
+
+def test_everygrams_min_len(everygram_input):
+    expected_output = [
+        ("a", "b"),
+        ("a", "b", "c"),
+        ("b", "c"),
+    ]
+    output = list(everygrams(everygram_input, min_len=2))
+    assert output == expected_output
+
+
+def test_everygrams_pad_right(everygram_input):
+    expected_output = [
+        ("a",),
+        ("a", "b"),
+        ("a", "b", "c"),
+        ("b",),
+        ("b", "c"),
+        ("b", "c", None),
+        ("c",),
+        ("c", None),
+        ("c", None, None),
+        (None,),
+        (None, None),
+        (None,),
+    ]
+    output = list(everygrams(everygram_input, max_len=3, pad_right=True))
+    assert output == expected_output
+
+
+def test_everygrams_pad_left(everygram_input):
+    expected_output = [
+        (None,),
+        (None, None),
+        (None, None, "a"),
+        (None,),
+        (None, "a"),
+        (None, "a", "b"),
+        ("a",),
+        ("a", "b"),
+        ("a", "b", "c"),
+        ("b",),
+        ("b", "c"),
+        ("c",),
+    ]
+    output = list(everygrams(everygram_input, max_len=3, pad_left=True))
+    assert output == expected_output
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_wordnet.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/test_wordnet.py
@@ -0,0 +1,292 @@
+"""
+Unit tests for nltk.corpus.wordnet
+See also nltk/test/wordnet.doctest
+"""
+
+import unittest
+
+from nltk.corpus import wordnet as wn
+from nltk.corpus import wordnet_ic as wnic
+
+wn.ensure_loaded()
+S = wn.synset
+L = wn.lemma
+
+
+class WordnNetDemo(unittest.TestCase):
+    def test_retrieve_synset(self):
+        move_synset = S("go.v.21")
+        self.assertEqual(move_synset.name(), "move.v.15")
+        self.assertEqual(move_synset.lemma_names(), ["move", "go"])
+        self.assertEqual(
+            move_synset.definition(), "have a turn; make one's move in a game"
+        )
+        self.assertEqual(move_synset.examples(), ["Can I go now?"])
+
+    def test_retrieve_synsets(self):
+        self.assertEqual(sorted(wn.synsets("zap", pos="n")), [S("zap.n.01")])
+        self.assertEqual(
+            sorted(wn.synsets("zap", pos="v")),
+            [S("microwave.v.01"), S("nuke.v.01"), S("zap.v.01"), S("zap.v.02")],
+        )
+
+    def test_hyperhyponyms(self):
+        # Not every synset as hypernyms()
+        self.assertEqual(S("travel.v.01").hypernyms(), [])
+        self.assertEqual(S("travel.v.02").hypernyms(), [S("travel.v.03")])
+        self.assertEqual(S("travel.v.03").hypernyms(), [])
+
+        # Test hyper-/hyponyms.
+        self.assertEqual(S("breakfast.n.1").hypernyms(), [S("meal.n.01")])
+        first_five_meal_hypo = [
+            S("banquet.n.02"),
+            S("bite.n.04"),
+            S("breakfast.n.01"),
+            S("brunch.n.01"),
+            S("buffet.n.02"),
+        ]
+        self.assertEqual(sorted(S("meal.n.1").hyponyms())[:5], first_five_meal_hypo)
+        self.assertEqual(S("Austen.n.1").instance_hypernyms(), [S("writer.n.01")])
+        first_five_composer_hypo = [
+            S("ambrose.n.01"),
+            S("bach.n.01"),
+            S("barber.n.01"),
+            S("bartok.n.01"),
+            S("beethoven.n.01"),
+        ]
+        self.assertEqual(
+            sorted(S("composer.n.1").instance_hyponyms())[:5], first_five_composer_hypo
+        )
+
+        # Test root hyper-/hyponyms
+        self.assertEqual(S("person.n.01").root_hypernyms(), [S("entity.n.01")])
+        self.assertEqual(S("sail.v.01").root_hypernyms(), [S("travel.v.01")])
+        self.assertEqual(
+            sorted(S("fall.v.12").root_hypernyms()), [S("act.v.01"), S("fall.v.17")]
+        )
+
+    def test_derivationally_related_forms(self):
+        # Test `derivationally_related_forms()`
+        self.assertEqual(
+            L("zap.v.03.nuke").derivationally_related_forms(),
+            [L("atomic_warhead.n.01.nuke")],
+        )
+        self.assertEqual(
+            L("zap.v.03.atomize").derivationally_related_forms(),
+            [L("atomization.n.02.atomization")],
+        )
+        self.assertEqual(
+            L("zap.v.03.atomise").derivationally_related_forms(),
+            [L("atomization.n.02.atomisation")],
+        )
+        self.assertEqual(L("zap.v.03.zap").derivationally_related_forms(), [])
+
+    def test_meronyms_holonyms(self):
+        # Test meronyms, holonyms.
+        self.assertEqual(
+            sorted(S("dog.n.01").member_holonyms()), [S("canis.n.01"), S("pack.n.06")]
+        )
+        self.assertEqual(S("dog.n.01").part_meronyms(), [S("flag.n.07")])
+
+        self.assertEqual(S("faculty.n.2").member_meronyms(), [S("professor.n.01")])
+        self.assertEqual(S("copilot.n.1").member_holonyms(), [S("crew.n.01")])
+
+        self.assertEqual(
+            sorted(S("table.n.2").part_meronyms()),
+            [S("leg.n.03"), S("tabletop.n.01"), S("tableware.n.01")],
+        )
+        self.assertEqual(S("course.n.7").part_holonyms(), [S("meal.n.01")])
+
+        self.assertEqual(
+            sorted(S("water.n.1").substance_meronyms()),
+            [S("hydrogen.n.01"), S("oxygen.n.01")],
+        )
+        self.assertEqual(
+            sorted(S("gin.n.1").substance_holonyms()),
+            [
+                S("gin_and_it.n.01"),
+                S("gin_and_tonic.n.01"),
+                S("martini.n.01"),
+                S("pink_lady.n.01"),
+            ],
+        )
+
+    def test_antonyms(self):
+        # Test antonyms.
+        self.assertEqual(
+            L("leader.n.1.leader").antonyms(), [L("follower.n.01.follower")]
+        )
+        self.assertEqual(
+            L("increase.v.1.increase").antonyms(), [L("decrease.v.01.decrease")]
+        )
+
+    def test_misc_relations(self):
+        # Test misc relations.
+        self.assertEqual(S("snore.v.1").entailments(), [S("sleep.v.01")])
+        self.assertEqual(
+            sorted(S("heavy.a.1").similar_tos()),
+            [
+                S("dense.s.03"),
+                S("doughy.s.01"),
+                S("heavier-than-air.s.01"),
+                S("hefty.s.02"),
+                S("massive.s.04"),
+                S("non-buoyant.s.01"),
+                S("ponderous.s.02"),
+            ],
+        )
+        self.assertEqual(S("light.a.1").attributes(), [S("weight.n.01")])
+        self.assertEqual(S("heavy.a.1").attributes(), [S("weight.n.01")])
+
+        # Test pertainyms.
+        self.assertEqual(
+            L("English.a.1.English").pertainyms(), [L("england.n.01.England")]
+        )
+
+    def test_lch(self):
+        # Test LCH.
+        self.assertEqual(
+            S("person.n.01").lowest_common_hypernyms(S("dog.n.01")),
+            [S("organism.n.01")],
+        )
+        self.assertEqual(
+            S("woman.n.01").lowest_common_hypernyms(S("girlfriend.n.02")),
+            [S("woman.n.01")],
+        )
+
+    def test_domains(self):
+        # Test domains.
+        self.assertEqual(S("code.n.03").topic_domains(), [S("computer_science.n.01")])
+        self.assertEqual(S("pukka.a.01").region_domains(), [S("india.n.01")])
+        self.assertEqual(S("freaky.a.01").usage_domains(), [S("slang.n.02")])
+
+    def test_in_topic_domains(self):
+        # Test in domains.
+        self.assertEqual(
+            sorted(S("computer_science.n.01").in_topic_domains())[0], S("access.n.05")
+        )
+        self.assertEqual(
+            sorted(S("germany.n.01").in_region_domains())[23], S("trillion.n.02")
+        )
+        self.assertEqual(
+            sorted(S("slang.n.02").in_usage_domains())[1], S("airhead.n.01")
+        )
+
+    def test_wordnet_similarities(self):
+        # Path based similarities.
+        self.assertAlmostEqual(S("cat.n.01").path_similarity(S("cat.n.01")), 1.0)
+        self.assertAlmostEqual(S("dog.n.01").path_similarity(S("cat.n.01")), 0.2)
+        self.assertAlmostEqual(
+            S("car.n.01").path_similarity(S("automobile.v.01")),
+            S("automobile.v.01").path_similarity(S("car.n.01")),
+        )
+        self.assertAlmostEqual(
+            S("big.a.01").path_similarity(S("dog.n.01")),
+            S("dog.n.01").path_similarity(S("big.a.01")),
+        )
+        self.assertAlmostEqual(
+            S("big.a.01").path_similarity(S("long.a.01")),
+            S("long.a.01").path_similarity(S("big.a.01")),
+        )
+        self.assertAlmostEqual(
+            S("dog.n.01").lch_similarity(S("cat.n.01")), 2.028, places=3
+        )
+        self.assertAlmostEqual(
+            S("dog.n.01").wup_similarity(S("cat.n.01")), 0.8571, places=3
+        )
+        self.assertAlmostEqual(
+            S("car.n.01").wup_similarity(S("automobile.v.01")),
+            S("automobile.v.01").wup_similarity(S("car.n.01")),
+        )
+        self.assertAlmostEqual(
+            S("big.a.01").wup_similarity(S("dog.n.01")),
+            S("dog.n.01").wup_similarity(S("big.a.01")),
+        )
+        self.assertAlmostEqual(
+            S("big.a.01").wup_similarity(S("long.a.01")),
+            S("long.a.01").wup_similarity(S("big.a.01")),
+        )
+        self.assertAlmostEqual(
+            S("big.a.01").lch_similarity(S("long.a.01")),
+            S("long.a.01").lch_similarity(S("big.a.01")),
+        )
+        # Information Content similarities.
+        brown_ic = wnic.ic("ic-brown.dat")
+        self.assertAlmostEqual(
+            S("dog.n.01").jcn_similarity(S("cat.n.01"), brown_ic), 0.4497, places=3
+        )
+        semcor_ic = wnic.ic("ic-semcor.dat")
+        self.assertAlmostEqual(
+            S("dog.n.01").lin_similarity(S("cat.n.01"), semcor_ic), 0.8863, places=3
+        )
+
+    def test_omw_lemma_no_trailing_underscore(self):
+        expected = sorted(
+            [
+                "popolna_sprememba_v_mišljenju",
+                "popoln_obrat",
+                "preobrat",
+                "preobrat_v_mišljenju",
+            ]
+        )
+        self.assertEqual(sorted(S("about-face.n.02").lemma_names(lang="slv")), expected)
+
+    def test_iterable_type_for_all_lemma_names(self):
+        # Duck-test for iterables.
+        # See https://stackoverflow.com/a/36230057/610569
+        cat_lemmas = wn.all_lemma_names(lang="cat")
+        eng_lemmas = wn.all_lemma_names(lang="eng")
+
+        self.assertTrue(hasattr(eng_lemmas, "__iter__"))
+        self.assertTrue(hasattr(eng_lemmas, "__next__") or hasattr(eng_lemmas, "next"))
+        self.assertTrue(eng_lemmas.__iter__() is eng_lemmas)
+
+        self.assertTrue(hasattr(cat_lemmas, "__iter__"))
+        self.assertTrue(hasattr(cat_lemmas, "__next__") or hasattr(eng_lemmas, "next"))
+        self.assertTrue(cat_lemmas.__iter__() is cat_lemmas)
+
+    def test_en_ptb_tags(self):
+        # Common PTB tags (mapped in both PTB and Brown)
+        self.assertEqual(wn.tag2pos("NN"), "n")  # noun
+        self.assertEqual(wn.tag2pos("VB"), "v")  # verb
+        self.assertEqual(wn.tag2pos("JJ"), "a")  # adjective
+        self.assertEqual(wn.tag2pos("RB"), "r")  # adverb
+
+        # PTB-specific tags (mapped in PTB, not in Brown)
+        self.assertEqual(wn.tag2pos("NNS"), "n")  # plural noun (PTB only)
+        self.assertEqual(wn.tag2pos("VBD"), "v")  # verb, past tense (PTB only)
+        self.assertEqual(
+            wn.tag2pos("VBG"), "v"
+        )  # verb, gerund/present participle (PTB only)
+        self.assertEqual(wn.tag2pos("JJR"), "a")  # adjective, comparative (PTB only)
+        self.assertEqual(wn.tag2pos("RBR"), "r")  # adverb, comparative (PTB only)
+
+        # Tags that should yield None (not mapped in WordNet)
+        self.assertIsNone(wn.tag2pos("PRP"))
+        self.assertIsNone(wn.tag2pos("WP"))
+        self.assertIsNone(wn.tag2pos("TO"))
+        self.assertIsNone(wn.tag2pos("PRT"))
+        self.assertIsNone(wn.tag2pos("POS"))
+        self.assertIsNone(wn.tag2pos("."))
+
+    def test_en_brown_tags(self):
+        # Common Brown tags (mapped in both PTB and Brown)
+        self.assertEqual(wn.tag2pos("NN", tagset="en-brown"), "n")  # noun
+        self.assertEqual(wn.tag2pos("VB", tagset="en-brown"), "v")  # verb
+        self.assertEqual(wn.tag2pos("JJ", tagset="en-brown"), "a")  # adjective
+        self.assertEqual(wn.tag2pos("RB", tagset="en-brown"), "r")  # adverb
+
+        # Brown-specific tags (mapped in Brown, not in PTB)
+        self.assertEqual(
+            wn.tag2pos("HV", tagset="en-brown"), "v"
+        )  # 'have' auxiliary (Brown only)
+        self.assertEqual(
+            wn.tag2pos("BEZ", tagset="en-brown"), "v"
+        )  # 'be' auxiliary, 3rd person singular present (Brown only)
+        self.assertEqual(
+            wn.tag2pos("DOZ", tagset="en-brown"), "v"
+        )  # 'do' auxiliary, 3rd person singular present (Brown only)
+
+        # Tags that should yield None (not mapped in WordNet)
+        self.assertIsNone(wn.tag2pos("PPL", tagset="en-brown"))
+        self.assertIsNone(wn.tag2pos("(", tagset="en-brown"))
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/init.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/init.py
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/init.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/init.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_bleu.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_bleu.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_gdfa.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_gdfa.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_ibm1.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_ibm1.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_ibm2.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_ibm2.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_ibm3.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_ibm3.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_ibm4.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_ibm4.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_ibm5.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_ibm5.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_ibm_model.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_ibm_model.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_meteor.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_meteor.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_nist.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_nist.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_stack_decoder.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/pycache/test_stack_decoder.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/test_bleu.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/test_bleu.py
@@ -0,0 +1,416 @@
+"""
+Tests for BLEU translation evaluation metric
+"""
+
+import unittest
+
+import numpy as np
+
+from nltk.data import find
+from nltk.translate.bleu_score import (
+    SmoothingFunction,
+    brevity_penalty,
+    closest_ref_length,
+    corpus_bleu,
+    modified_precision,
+    sentence_bleu,
+)
+
+
+class TestBLEU(unittest.TestCase):
+    def test_modified_precision(self):
+        """
+        Examples from the original BLEU paper
+        https://www.aclweb.org/anthology/P02-1040.pdf
+        """
+        # Example 1: the "the*" example.
+        # Reference sentences.
+        ref1 = "the cat is on the mat".split()
+        ref2 = "there is a cat on the mat".split()
+        # Hypothesis sentence(s).
+        hyp1 = "the the the the the the the".split()
+
+        references = [ref1, ref2]
+
+        # Testing modified unigram precision.
+        hyp1_unigram_precision = float(modified_precision(references, hyp1, n=1))
+        assert round(hyp1_unigram_precision, 4) == 0.2857
+        # With assertAlmostEqual at 4 place precision.
+        self.assertAlmostEqual(hyp1_unigram_precision, 0.28571428, places=4)
+
+        # Testing modified bigram precision.
+        assert float(modified_precision(references, hyp1, n=2)) == 0.0
+
+        # Example 2: the "of the" example.
+        # Reference sentences
+        ref1 = str(
+            "It is a guide to action that ensures that the military "
+            "will forever heed Party commands"
+        ).split()
+        ref2 = str(
+            "It is the guiding principle which guarantees the military "
+            "forces always being under the command of the Party"
+        ).split()
+        ref3 = str(
+            "It is the practical guide for the army always to heed "
+            "the directions of the party"
+        ).split()
+        # Hypothesis sentence(s).
+        hyp1 = "of the".split()
+
+        references = [ref1, ref2, ref3]
+        # Testing modified unigram precision.
+        assert float(modified_precision(references, hyp1, n=1)) == 1.0
+
+        # Testing modified bigram precision.
+        assert float(modified_precision(references, hyp1, n=2)) == 1.0
+
+        # Example 3: Proper MT outputs.
+        hyp1 = str(
+            "It is a guide to action which ensures that the military "
+            "always obeys the commands of the party"
+        ).split()
+        hyp2 = str(
+            "It is to insure the troops forever hearing the activity "
+            "guidebook that party direct"
+        ).split()
+
+        references = [ref1, ref2, ref3]
+
+        # Unigram precision.
+        hyp1_unigram_precision = float(modified_precision(references, hyp1, n=1))
+        hyp2_unigram_precision = float(modified_precision(references, hyp2, n=1))
+        # Test unigram precision with assertAlmostEqual at 4 place precision.
+        self.assertAlmostEqual(hyp1_unigram_precision, 0.94444444, places=4)
+        self.assertAlmostEqual(hyp2_unigram_precision, 0.57142857, places=4)
+        # Test unigram precision with rounding.
+        assert round(hyp1_unigram_precision, 4) == 0.9444
+        assert round(hyp2_unigram_precision, 4) == 0.5714
+
+        # Bigram precision
+        hyp1_bigram_precision = float(modified_precision(references, hyp1, n=2))
+        hyp2_bigram_precision = float(modified_precision(references, hyp2, n=2))
+        # Test bigram precision with assertAlmostEqual at 4 place precision.
+        self.assertAlmostEqual(hyp1_bigram_precision, 0.58823529, places=4)
+        self.assertAlmostEqual(hyp2_bigram_precision, 0.07692307, places=4)
+        # Test bigram precision with rounding.
+        assert round(hyp1_bigram_precision, 4) == 0.5882
+        assert round(hyp2_bigram_precision, 4) == 0.0769
+
+    def test_brevity_penalty(self):
+        # Test case from brevity_penalty_closest function in mteval-v13a.pl.
+        # Same test cases as in the doctest in nltk.translate.bleu_score.py
+        references = [["a"] * 11, ["a"] * 8]
+        hypothesis = ["a"] * 7
+        hyp_len = len(hypothesis)
+        closest_ref_len = closest_ref_length(references, hyp_len)
+        self.assertAlmostEqual(
+            brevity_penalty(closest_ref_len, hyp_len), 0.8669, places=4
+        )
+
+        references = [["a"] * 11, ["a"] * 8, ["a"] * 6, ["a"] * 7]
+        hypothesis = ["a"] * 7
+        hyp_len = len(hypothesis)
+        closest_ref_len = closest_ref_length(references, hyp_len)
+        assert brevity_penalty(closest_ref_len, hyp_len) == 1.0
+
+    def test_zero_matches(self):
+        # Test case where there's 0 matches
+        references = ["The candidate has no alignment to any of the references".split()]
+        hypothesis = "John loves Mary".split()
+
+        # Test BLEU to nth order of n-grams, where n is len(hypothesis).
+        for n in range(1, len(hypothesis)):
+            weights = (1.0 / n,) * n  # Uniform weights.
+            assert sentence_bleu(references, hypothesis, weights) == 0
+
+    def test_full_matches(self):
+        # Test case where there's 100% matches
+        references = ["John loves Mary".split()]
+        hypothesis = "John loves Mary".split()
+
+        # Test BLEU to nth order of n-grams, where n is len(hypothesis).
+        for n in range(1, len(hypothesis)):
+            weights = (1.0 / n,) * n  # Uniform weights.
+            assert sentence_bleu(references, hypothesis, weights) == 1.0
+
+    def test_partial_matches_hypothesis_longer_than_reference(self):
+        references = ["John loves Mary".split()]
+        hypothesis = "John loves Mary who loves Mike".split()
+        # Since no 4-grams matches were found the result should be zero
+        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
+        self.assertAlmostEqual(sentence_bleu(references, hypothesis), 0.0, places=4)
+        # Checks that the warning has been raised because len(reference) < 4.
+        try:
+            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
+        except AttributeError:
+            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+
+
+# @unittest.skip("Skipping fringe cases for BLEU.")
+class TestBLEUFringeCases(unittest.TestCase):
+    def test_case_where_n_is_bigger_than_hypothesis_length(self):
+        # Test BLEU to nth order of n-grams, where n > len(hypothesis).
+        references = ["John loves Mary ?".split()]
+        hypothesis = "John loves Mary".split()
+        n = len(hypothesis) + 1  #
+        weights = (1.0 / n,) * n  # Uniform weights.
+        # Since no n-grams matches were found the result should be zero
+        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
+        self.assertAlmostEqual(
+            sentence_bleu(references, hypothesis, weights), 0.0, places=4
+        )
+        # Checks that the warning has been raised because len(hypothesis) < 4.
+        try:
+            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
+        except AttributeError:
+            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+
+        # Test case where n > len(hypothesis) but so is n > len(reference), and
+        # it's a special case where reference == hypothesis.
+        references = ["John loves Mary".split()]
+        hypothesis = "John loves Mary".split()
+        # Since no 4-grams matches were found the result should be zero
+        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
+        self.assertAlmostEqual(
+            sentence_bleu(references, hypothesis, weights), 0.0, places=4
+        )
+
+    def test_empty_hypothesis(self):
+        # Test case where there's hypothesis is empty.
+        references = ["The candidate has no alignment to any of the references".split()]
+        hypothesis = []
+        assert sentence_bleu(references, hypothesis) == 0
+
+    def test_length_one_hypothesis(self):
+        # Test case where there's hypothesis is of length 1 in Smoothing method 4.
+        references = ["The candidate has no alignment to any of the references".split()]
+        hypothesis = ["Foo"]
+        method4 = SmoothingFunction().method4
+        try:
+            sentence_bleu(references, hypothesis, smoothing_function=method4)
+        except ValueError:
+            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+
+    def test_empty_references(self):
+        # Test case where there's reference is empty.
+        references = [[]]
+        hypothesis = "John loves Mary".split()
+        assert sentence_bleu(references, hypothesis) == 0
+
+    def test_empty_references_and_hypothesis(self):
+        # Test case where both references and hypothesis is empty.
+        references = [[]]
+        hypothesis = []
+        assert sentence_bleu(references, hypothesis) == 0
+
+    def test_reference_or_hypothesis_shorter_than_fourgrams(self):
+        # Test case where the length of reference or hypothesis
+        # is shorter than 4.
+        references = ["let it go".split()]
+        hypothesis = "let go it".split()
+        # Checks that the value the hypothesis and reference returns is 0.0
+        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
+        self.assertAlmostEqual(sentence_bleu(references, hypothesis), 0.0, places=4)
+        # Checks that the warning has been raised.
+        try:
+            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
+        except AttributeError:
+            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+
+    def test_numpy_weights(self):
+        # Test case where there's 0 matches
+        references = ["The candidate has no alignment to any of the references".split()]
+        hypothesis = "John loves Mary".split()
+
+        weights = np.array([0.25] * 4)
+        assert sentence_bleu(references, hypothesis, weights) == 0
+
+
+class TestBLEUvsMteval13a(unittest.TestCase):
+    def test_corpus_bleu(self):
+        ref_file = find("models/wmt15_eval/ref.ru")
+        hyp_file = find("models/wmt15_eval/google.ru")
+        mteval_output_file = find("models/wmt15_eval/mteval-13a.output")
+
+        # Reads the BLEU scores from the `mteval-13a.output` file.
+        # The order of the list corresponds to the order of the ngrams.
+        with open(mteval_output_file) as mteval_fin:
+            # The numbers are located in the last 2nd line of the file.
+            # The first and 2nd item in the list are the score and system names.
+            mteval_bleu_scores = map(float, mteval_fin.readlines()[-2].split()[1:-1])
+
+        with open(ref_file, encoding="utf8") as ref_fin:
+            with open(hyp_file, encoding="utf8") as hyp_fin:
+                # Whitespace tokenize the file.
+                # Note: split() automatically strip().
+                hypothesis = list(map(lambda x: x.split(), hyp_fin))
+                # Note that the corpus_bleu input is list of list of references.
+                references = list(map(lambda x: [x.split()], ref_fin))
+                # Without smoothing.
+                for i, mteval_bleu in zip(range(1, 10), mteval_bleu_scores):
+                    nltk_bleu = corpus_bleu(
+                        references, hypothesis, weights=(1.0 / i,) * i
+                    )
+                    # Check that the BLEU scores difference is less than 0.005 .
+                    # Note: This is an approximate comparison; as much as
+                    #       +/- 0.01 BLEU might be "statistically significant",
+                    #       the actual translation quality might not be.
+                    assert abs(mteval_bleu - nltk_bleu) < 0.005
+
+                # With the same smoothing method used in mteval-v13a.pl
+                chencherry = SmoothingFunction()
+                for i, mteval_bleu in zip(range(1, 10), mteval_bleu_scores):
+                    nltk_bleu = corpus_bleu(
+                        references,
+                        hypothesis,
+                        weights=(1.0 / i,) * i,
+                        smoothing_function=chencherry.method3,
+                    )
+                    assert abs(mteval_bleu - nltk_bleu) < 0.005
+
+
+class TestBLEUWithBadSentence(unittest.TestCase):
+    def test_corpus_bleu_with_bad_sentence(self):
+        hyp = "Teo S yb , oe uNb , R , T t , , t Tue Ar saln S , , 5istsi l , 5oe R ulO sae oR R"
+        ref = str(
+            "Their tasks include changing a pump on the faulty stokehold ."
+            "Likewise , two species that are very similar in morphology "
+            "were distinguished using genetics ."
+        )
+        references = [[ref.split()]]
+        hypotheses = [hyp.split()]
+        try:  # Check that the warning is raised since no. of 2-grams < 0.
+            with self.assertWarns(UserWarning):
+                # Verify that the BLEU output is undesired since no. of 2-grams < 0.
+                self.assertAlmostEqual(
+                    corpus_bleu(references, hypotheses), 0.0, places=4
+                )
+        except (
+            AttributeError
+        ):  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+            self.assertAlmostEqual(corpus_bleu(references, hypotheses), 0.0, places=4)
+
+
+class TestBLEUWithMultipleWeights(unittest.TestCase):
+    def test_corpus_bleu_with_multiple_weights(self):
+        hyp1 = [
+            "It",
+            "is",
+            "a",
+            "guide",
+            "to",
+            "action",
+            "which",
+            "ensures",
+            "that",
+            "the",
+            "military",
+            "always",
+            "obeys",
+            "the",
+            "commands",
+            "of",
+            "the",
+            "party",
+        ]
+        ref1a = [
+            "It",
+            "is",
+            "a",
+            "guide",
+            "to",
+            "action",
+            "that",
+            "ensures",
+            "that",
+            "the",
+            "military",
+            "will",
+            "forever",
+            "heed",
+            "Party",
+            "commands",
+        ]
+        ref1b = [
+            "It",
+            "is",
+            "the",
+            "guiding",
+            "principle",
+            "which",
+            "guarantees",
+            "the",
+            "military",
+            "forces",
+            "always",
+            "being",
+            "under",
+            "the",
+            "command",
+            "of",
+            "the",
+            "Party",
+        ]
+        ref1c = [
+            "It",
+            "is",
+            "the",
+            "practical",
+            "guide",
+            "for",
+            "the",
+            "army",
+            "always",
+            "to",
+            "heed",
+            "the",
+            "directions",
+            "of",
+            "the",
+            "party",
+        ]
+        hyp2 = [
+            "he",
+            "read",
+            "the",
+            "book",
+            "because",
+            "he",
+            "was",
+            "interested",
+            "in",
+            "world",
+            "history",
+        ]
+        ref2a = [
+            "he",
+            "was",
+            "interested",
+            "in",
+            "world",
+            "history",
+            "because",
+            "he",
+            "read",
+            "the",
+            "book",
+        ]
+        weight_1 = (1, 0, 0, 0)
+        weight_2 = (0.25, 0.25, 0.25, 0.25)
+        weight_3 = (0, 0, 0, 0, 1)
+
+        bleu_scores = corpus_bleu(
+            list_of_references=[[ref1a, ref1b, ref1c], [ref2a]],
+            hypotheses=[hyp1, hyp2],
+            weights=[weight_1, weight_2, weight_3],
+        )
+        assert bleu_scores[0] == corpus_bleu(
+            [[ref1a, ref1b, ref1c], [ref2a]], [hyp1, hyp2], weight_1
+        )
+        assert bleu_scores[1] == corpus_bleu(
+            [[ref1a, ref1b, ref1c], [ref2a]], [hyp1, hyp2], weight_2
+        )
+        assert bleu_scores[2] == corpus_bleu(
+            [[ref1a, ref1b, ref1c], [ref2a]], [hyp1, hyp2], weight_3
+        )
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/test_gdfa.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/test_gdfa.py
@@ -0,0 +1,154 @@
+"""
+Tests GDFA alignments
+"""
+
+import unittest
+
+from nltk.translate.gdfa import grow_diag_final_and
+
+
+class TestGDFA(unittest.TestCase):
+    def test_from_eflomal_outputs(self):
+        """
+        Testing GDFA with first 10 eflomal outputs from issue #1829
+        https://github.com/nltk/nltk/issues/1829
+        """
+        # Input.
+        forwards = [
+            "0-0 1-2",
+            "0-0 1-1",
+            "0-0 2-1 3-2 4-3 5-4 6-5 7-6 8-7 7-8 9-9 10-10 9-11 11-12 12-13 13-14",
+            "0-0 1-1 1-2 2-3 3-4 4-5 4-6 5-7 6-8 8-9 9-10",
+            "0-0 14-1 15-2 16-3 20-5 21-6 22-7 5-8 6-9 7-10 8-11 9-12 10-13 11-14 12-15 13-16 14-17 17-18 18-19 19-20 20-21 23-22 24-23 25-24 26-25 27-27 28-28 29-29 30-30 31-31",
+            "0-0 1-1 0-2 2-3",
+            "0-0 2-2 4-4",
+            "0-0 1-1 2-3 3-4 5-5 7-6 8-7 9-8 10-9 11-10 12-11 13-12 14-13 15-14 16-16 17-17 18-18 19-19 20-20",
+            "3-0 4-1 6-2 5-3 6-4 7-5 8-6 9-7 10-8 11-9 16-10 9-12 10-13 12-14",
+            "1-0",
+        ]
+        backwards = [
+            "0-0 1-2",
+            "0-0 1-1",
+            "0-0 2-1 3-2 4-3 5-4 6-5 7-6 8-7 9-8 10-10 11-12 12-11 13-13",
+            "0-0 1-2 2-3 3-4 4-6 6-8 7-5 8-7 9-8",
+            "0-0 1-8 2-9 3-10 4-11 5-12 6-11 8-13 9-14 10-15 11-16 12-17 13-18 14-19 15-20 16-21 17-22 18-23 19-24 20-29 21-30 22-31 23-2 24-3 25-4 26-5 27-5 28-6 29-7 30-28 31-31",
+            "0-0 1-1 2-3",
+            "0-0 1-1 2-3 4-4",
+            "0-0 1-1 2-3 3-4 5-5 7-6 8-7 9-8 10-9 11-10 12-11 13-12 14-13 15-14 16-16 17-17 18-18 19-19 20-16 21-18",
+            "0-0 1-1 3-2 4-1 5-3 6-4 7-5 8-6 9-7 10-8 11-9 12-8 13-9 14-8 15-9 16-10",
+            "1-0",
+        ]
+        source_lens = [2, 3, 3, 15, 11, 33, 4, 6, 23, 18]
+        target_lens = [2, 4, 3, 16, 12, 33, 5, 6, 22, 16]
+        # Expected Output.
+        expected = [
+            [(0, 0), (1, 2)],
+            [(0, 0), (1, 1)],
+            [
+                (0, 0),
+                (2, 1),
+                (3, 2),
+                (4, 3),
+                (5, 4),
+                (6, 5),
+                (7, 6),
+                (8, 7),
+                (10, 10),
+                (11, 12),
+            ],
+            [
+                (0, 0),
+                (1, 1),
+                (1, 2),
+                (2, 3),
+                (3, 4),
+                (4, 5),
+                (4, 6),
+                (5, 7),
+                (6, 8),
+                (7, 5),
+                (8, 7),
+                (8, 9),
+                (9, 8),
+                (9, 10),
+            ],
+            [
+                (0, 0),
+                (1, 8),
+                (2, 9),
+                (3, 10),
+                (4, 11),
+                (5, 8),
+                (6, 9),
+                (6, 11),
+                (7, 10),
+                (8, 11),
+                (31, 31),
+            ],
+            [(0, 0), (0, 2), (1, 1), (2, 3)],
+            [(0, 0), (1, 1), (2, 2), (2, 3), (4, 4)],
+            [
+                (0, 0),
+                (1, 1),
+                (2, 3),
+                (3, 4),
+                (5, 5),
+                (7, 6),
+                (8, 7),
+                (9, 8),
+                (10, 9),
+                (11, 10),
+                (12, 11),
+                (13, 12),
+                (14, 13),
+                (15, 14),
+                (16, 16),
+                (17, 17),
+                (18, 18),
+                (19, 19),
+            ],
+            [
+                (0, 0),
+                (1, 1),
+                (3, 0),
+                (3, 2),
+                (4, 1),
+                (5, 3),
+                (6, 2),
+                (6, 4),
+                (7, 5),
+                (8, 6),
+                (9, 7),
+                (9, 12),
+                (10, 8),
+                (10, 13),
+                (11, 9),
+                (12, 8),
+                (12, 14),
+                (13, 9),
+                (14, 8),
+                (15, 9),
+                (16, 10),
+            ],
+            [(1, 0)],
+            [
+                (0, 0),
+                (1, 1),
+                (3, 2),
+                (4, 3),
+                (5, 4),
+                (6, 5),
+                (7, 6),
+                (9, 10),
+                (10, 12),
+                (11, 13),
+                (12, 14),
+                (13, 15),
+            ],
+        ]
+
+        # Iterate through all 10 examples and check for expected outputs.
+        for fw, bw, src_len, trg_len, expect in zip(
+            forwards, backwards, source_lens, target_lens, expected
+        ):
+            self.assertListEqual(expect, grow_diag_final_and(src_len, trg_len, fw, bw))
--- a/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/test_ibm1.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/test/unit/translate/test_ibm1.py
@@ -0,0 +1,73 @@
+"""
+Tests for IBM Model 1 training methods
+"""
+
+import unittest
+from collections import defaultdict
+
+from nltk.translate import AlignedSent, IBMModel, IBMModel1
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel1(unittest.TestCase):
+    def test_set_uniform_translation_probabilities(self):
+        # arrange
+        corpus = [
+            AlignedSent(["ham", "eggs"], ["schinken", "schinken", "eier"]),
+            AlignedSent(["spam", "spam", "spam", "spam"], ["spam", "spam"]),
+        ]
+        model1 = IBMModel1(corpus, 0)
+
+        # act
+        model1.set_uniform_probabilities(corpus)
+
+        # assert
+        # expected_prob = 1.0 / (target vocab size + 1)
+        self.assertEqual(model1.translation_table["ham"]["eier"], 1.0 / 3)
+        self.assertEqual(model1.translation_table["eggs"][None], 1.0 / 3)
+
+    def test_set_uniform_translation_probabilities_of_non_domain_values(self):
+        # arrange
+        corpus = [
+            AlignedSent(["ham", "eggs"], ["schinken", "schinken", "eier"]),
+            AlignedSent(["spam", "spam", "spam", "spam"], ["spam", "spam"]),
+        ]
+        model1 = IBMModel1(corpus, 0)
+
+        # act
+        model1.set_uniform_probabilities(corpus)
+
+        # assert
+        # examine target words that are not in the training data domain
+        self.assertEqual(model1.translation_table["parrot"]["eier"], IBMModel.MIN_PROB)
+
+    def test_prob_t_a_given_s(self):
+        # arrange
+        src_sentence = ["ich", "esse", "ja", "gern", "räucherschinken"]
+        trg_sentence = ["i", "love", "to", "eat", "smoked", "ham"]
+        corpus = [AlignedSent(trg_sentence, src_sentence)]
+        alignment_info = AlignmentInfo(
+            (0, 1, 4, 0, 2, 5, 5),
+            [None] + src_sentence,
+            ["UNUSED"] + trg_sentence,
+            None,
+        )
+
+        translation_table = defaultdict(lambda: defaultdict(float))
+        translation_table["i"]["ich"] = 0.98
+        translation_table["love"]["gern"] = 0.98
+        translation_table["to"][None] = 0.98
+        translation_table["eat"]["esse"] = 0.98
+        translation_table["smoked"]["räucherschinken"] = 0.98
+        translation_table["ham"]["räucherschinken"] = 0.98
+
+        model1 = IBMModel1(corpus, 0)
+        model1.translation_table = translation_table
+
+        # act
+        probability = model1.prob_t_a_given_s(alignment_info)
+
+        # assert
+        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
+        expected_probability = lexical_translation
+        self.assertEqual(round(probability, 4), round(expected_probability, 4))
--- a/Show More
+++ b/Show More