updates
This commit is contained in:
@@ -0,0 +1,11 @@
|
||||
# Natural Language Toolkit: Miscellaneous modules
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Steven Bird <stevenbird1@gmail.com>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
from nltk.misc.babelfish import babelize_shell
|
||||
from nltk.misc.chomsky import generate_chomsky
|
||||
from nltk.misc.minimalset import MinimalSet
|
||||
from nltk.misc.wordfinder import word_finder
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,10 @@
|
||||
"""
|
||||
This module previously provided an interface to Babelfish online
|
||||
translation service; this service is no longer available; this
|
||||
module is kept in NLTK source code in order to provide better error
|
||||
messages for people following the NLTK Book 2.0.
|
||||
"""
|
||||
|
||||
|
||||
def babelize_shell():
|
||||
print("Babelfish online translation service is no longer available.")
|
||||
134
Backend/venv/lib/python3.12/site-packages/nltk/misc/chomsky.py
Normal file
134
Backend/venv/lib/python3.12/site-packages/nltk/misc/chomsky.py
Normal file
@@ -0,0 +1,134 @@
|
||||
# Chomsky random text generator, version 1.1, Raymond Hettinger, 2005/09/13
|
||||
# https://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/440546
|
||||
|
||||
"""
|
||||
CHOMSKY is an aid to writing linguistic papers in the style
|
||||
of the great master. It is based on selected phrases taken
|
||||
from actual books and articles written by Noam Chomsky.
|
||||
Upon request, it assembles the phrases in the elegant
|
||||
stylistic patterns that Chomsky is noted for.
|
||||
To generate n sentences of linguistic wisdom, type
|
||||
|
||||
(CHOMSKY n) -- for example
|
||||
(CHOMSKY 5) generates half a screen of linguistic truth.
|
||||
"""
|
||||
|
||||
leadins = """To characterize a linguistic level L,
|
||||
On the other hand,
|
||||
This suggests that
|
||||
It appears that
|
||||
Furthermore,
|
||||
We will bring evidence in favor of the following thesis:
|
||||
To provide a constituent structure for T(Z,K),
|
||||
From C1, it follows that
|
||||
For any transformation which is sufficiently diversified in \
|
||||
application to be of any interest,
|
||||
Analogously,
|
||||
Clearly,
|
||||
Note that
|
||||
Of course,
|
||||
Suppose, for instance, that
|
||||
Thus
|
||||
With this clarification,
|
||||
Conversely,
|
||||
We have already seen that
|
||||
By combining adjunctions and certain deformations,
|
||||
I suggested that these results would follow from the assumption that
|
||||
If the position of the trace in (99c) were only relatively \
|
||||
inaccessible to movement,
|
||||
However, this assumption is not correct, since
|
||||
Comparing these examples with their parasitic gap counterparts in \
|
||||
(96) and (97), we see that
|
||||
In the discussion of resumptive pronouns following (81),
|
||||
So far,
|
||||
Nevertheless,
|
||||
For one thing,
|
||||
Summarizing, then, we assume that
|
||||
A consequence of the approach just outlined is that
|
||||
Presumably,
|
||||
On our assumptions,
|
||||
It may be, then, that
|
||||
It must be emphasized, once again, that
|
||||
Let us continue to suppose that
|
||||
Notice, incidentally, that """
|
||||
# List of LEADINs to buy time.
|
||||
|
||||
subjects = """ the notion of level of grammaticalness
|
||||
a case of semigrammaticalness of a different sort
|
||||
most of the methodological work in modern linguistics
|
||||
a subset of English sentences interesting on quite independent grounds
|
||||
the natural general principle that will subsume this case
|
||||
an important property of these three types of EC
|
||||
any associated supporting element
|
||||
the appearance of parasitic gaps in domains relatively inaccessible \
|
||||
to ordinary extraction
|
||||
the speaker-hearer's linguistic intuition
|
||||
the descriptive power of the base component
|
||||
the earlier discussion of deviance
|
||||
this analysis of a formative as a pair of sets of features
|
||||
this selectionally introduced contextual feature
|
||||
a descriptively adequate grammar
|
||||
the fundamental error of regarding functional notions as categorial
|
||||
relational information
|
||||
the systematic use of complex symbols
|
||||
the theory of syntactic features developed earlier"""
|
||||
# List of SUBJECTs chosen for maximum professorial macho.
|
||||
|
||||
verbs = """can be defined in such a way as to impose
|
||||
delimits
|
||||
suffices to account for
|
||||
cannot be arbitrary in
|
||||
is not subject to
|
||||
does not readily tolerate
|
||||
raises serious doubts about
|
||||
is not quite equivalent to
|
||||
does not affect the structure of
|
||||
may remedy and, at the same time, eliminate
|
||||
is not to be considered in determining
|
||||
is to be regarded as
|
||||
is unspecified with respect to
|
||||
is, apparently, determined by
|
||||
is necessary to impose an interpretation on
|
||||
appears to correlate rather closely with
|
||||
is rather different from"""
|
||||
# List of VERBs chosen for autorecursive obfuscation.
|
||||
|
||||
objects = """ problems of phonemic and morphological analysis.
|
||||
a corpus of utterance tokens upon which conformity has been defined \
|
||||
by the paired utterance test.
|
||||
the traditional practice of grammarians.
|
||||
the levels of acceptability from fairly high (e.g. (99a)) to virtual \
|
||||
gibberish (e.g. (98d)).
|
||||
a stipulation to place the constructions into these various categories.
|
||||
a descriptive fact.
|
||||
a parasitic gap construction.
|
||||
the extended c-command discussed in connection with (34).
|
||||
the ultimate standard that determines the accuracy of any proposed grammar.
|
||||
the system of base rules exclusive of the lexicon.
|
||||
irrelevant intervening contexts in selectional rules.
|
||||
nondistinctness in the sense of distinctive feature theory.
|
||||
a general convention regarding the forms of the grammar.
|
||||
an abstract underlying order.
|
||||
an important distinction in language use.
|
||||
the requirement that branching is not tolerated within the dominance \
|
||||
scope of a complex symbol.
|
||||
the strong generative capacity of the theory."""
|
||||
# List of OBJECTs selected for profound sententiousness.
|
||||
|
||||
import random
|
||||
import textwrap
|
||||
from itertools import chain, islice
|
||||
|
||||
|
||||
def generate_chomsky(times=5, line_length=72):
|
||||
parts = []
|
||||
for part in (leadins, subjects, verbs, objects):
|
||||
phraselist = list(map(str.strip, part.splitlines()))
|
||||
random.shuffle(phraselist)
|
||||
parts.append(phraselist)
|
||||
output = chain.from_iterable(islice(zip(*parts), 0, times))
|
||||
print(textwrap.fill(" ".join(output), line_length))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
generate_chomsky()
|
||||
@@ -0,0 +1,85 @@
|
||||
# Natural Language Toolkit: Minimal Sets
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Steven Bird <stevenbird1@gmail.com>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
class MinimalSet:
|
||||
"""
|
||||
Find contexts where more than one possible target value can
|
||||
appear. E.g. if targets are word-initial letters, and contexts
|
||||
are the remainders of words, then we would like to find cases like
|
||||
"fat" vs "cat", and "training" vs "draining". If targets are
|
||||
parts-of-speech and contexts are words, then we would like to find
|
||||
cases like wind (noun) 'air in rapid motion', vs wind (verb)
|
||||
'coil, wrap'.
|
||||
"""
|
||||
|
||||
def __init__(self, parameters=None):
|
||||
"""
|
||||
Create a new minimal set.
|
||||
|
||||
:param parameters: The (context, target, display) tuples for the item
|
||||
:type parameters: list(tuple(str, str, str))
|
||||
"""
|
||||
self._targets = set() # the contrastive information
|
||||
self._contexts = set() # what we are controlling for
|
||||
self._seen = defaultdict(set) # to record what we have seen
|
||||
self._displays = {} # what we will display
|
||||
|
||||
if parameters:
|
||||
for context, target, display in parameters:
|
||||
self.add(context, target, display)
|
||||
|
||||
def add(self, context, target, display):
|
||||
"""
|
||||
Add a new item to the minimal set, having the specified
|
||||
context, target, and display form.
|
||||
|
||||
:param context: The context in which the item of interest appears
|
||||
:type context: str
|
||||
:param target: The item of interest
|
||||
:type target: str
|
||||
:param display: The information to be reported for each item
|
||||
:type display: str
|
||||
"""
|
||||
# Store the set of targets that occurred in this context
|
||||
self._seen[context].add(target)
|
||||
|
||||
# Keep track of which contexts and targets we have seen
|
||||
self._contexts.add(context)
|
||||
self._targets.add(target)
|
||||
|
||||
# For a given context and target, store the display form
|
||||
self._displays[(context, target)] = display
|
||||
|
||||
def contexts(self, minimum=2):
|
||||
"""
|
||||
Determine which contexts occurred with enough distinct targets.
|
||||
|
||||
:param minimum: the minimum number of distinct target forms
|
||||
:type minimum: int
|
||||
:rtype: list
|
||||
"""
|
||||
return [c for c in self._contexts if len(self._seen[c]) >= minimum]
|
||||
|
||||
def display(self, context, target, default=""):
|
||||
if (context, target) in self._displays:
|
||||
return self._displays[(context, target)]
|
||||
else:
|
||||
return default
|
||||
|
||||
def display_all(self, context):
|
||||
result = []
|
||||
for target in self._targets:
|
||||
x = self.display(context, target)
|
||||
if x:
|
||||
result.append(x)
|
||||
return result
|
||||
|
||||
def targets(self):
|
||||
return self._targets
|
||||
176
Backend/venv/lib/python3.12/site-packages/nltk/misc/sort.py
Normal file
176
Backend/venv/lib/python3.12/site-packages/nltk/misc/sort.py
Normal file
@@ -0,0 +1,176 @@
|
||||
# Natural Language Toolkit: List Sorting
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Steven Bird <stevenbird1@gmail.com>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
This module provides a variety of list sorting algorithms, to
|
||||
illustrate the many different algorithms (recipes) for solving a
|
||||
problem, and how to analyze algorithms experimentally.
|
||||
"""
|
||||
# These algorithms are taken from:
|
||||
# Levitin (2004) The Design and Analysis of Algorithms
|
||||
|
||||
##################################################################
|
||||
# Selection Sort
|
||||
##################################################################
|
||||
|
||||
|
||||
def selection(a):
|
||||
"""
|
||||
Selection Sort: scan the list to find its smallest element, then
|
||||
swap it with the first element. The remainder of the list is one
|
||||
element smaller; apply the same method to this list, and so on.
|
||||
"""
|
||||
count = 0
|
||||
|
||||
for i in range(len(a) - 1):
|
||||
min = i
|
||||
|
||||
for j in range(i + 1, len(a)):
|
||||
if a[j] < a[min]:
|
||||
min = j
|
||||
|
||||
count += 1
|
||||
|
||||
a[min], a[i] = a[i], a[min]
|
||||
|
||||
return count
|
||||
|
||||
|
||||
##################################################################
|
||||
# Bubble Sort
|
||||
##################################################################
|
||||
|
||||
|
||||
def bubble(a):
|
||||
"""
|
||||
Bubble Sort: compare adjacent elements of the list left-to-right,
|
||||
and swap them if they are out of order. After one pass through
|
||||
the list swapping adjacent items, the largest item will be in
|
||||
the rightmost position. The remainder is one element smaller;
|
||||
apply the same method to this list, and so on.
|
||||
"""
|
||||
count = 0
|
||||
for i in range(len(a) - 1):
|
||||
for j in range(len(a) - i - 1):
|
||||
if a[j + 1] < a[j]:
|
||||
a[j], a[j + 1] = a[j + 1], a[j]
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
##################################################################
|
||||
# Merge Sort
|
||||
##################################################################
|
||||
|
||||
|
||||
def _merge_lists(b, c):
|
||||
count = 0
|
||||
i = j = 0
|
||||
a = []
|
||||
while i < len(b) and j < len(c):
|
||||
count += 1
|
||||
if b[i] <= c[j]:
|
||||
a.append(b[i])
|
||||
i += 1
|
||||
else:
|
||||
a.append(c[j])
|
||||
j += 1
|
||||
if i == len(b):
|
||||
a += c[j:]
|
||||
else:
|
||||
a += b[i:]
|
||||
return a, count
|
||||
|
||||
|
||||
def merge(a):
|
||||
"""
|
||||
Merge Sort: split the list in half, and sort each half, then
|
||||
combine the sorted halves.
|
||||
"""
|
||||
count = 0
|
||||
if len(a) > 1:
|
||||
midpoint = len(a) // 2
|
||||
b = a[:midpoint]
|
||||
c = a[midpoint:]
|
||||
count_b = merge(b)
|
||||
count_c = merge(c)
|
||||
result, count_a = _merge_lists(b, c)
|
||||
a[:] = result # copy the result back into a.
|
||||
count = count_a + count_b + count_c
|
||||
return count
|
||||
|
||||
|
||||
##################################################################
|
||||
# Quick Sort
|
||||
##################################################################
|
||||
|
||||
|
||||
def _partition(a, l, r):
|
||||
p = a[l]
|
||||
i = l
|
||||
j = r + 1
|
||||
count = 0
|
||||
while True:
|
||||
while i < r:
|
||||
i += 1
|
||||
if a[i] >= p:
|
||||
break
|
||||
while j > l:
|
||||
j -= 1
|
||||
if j < l or a[j] <= p:
|
||||
break
|
||||
a[i], a[j] = a[j], a[i] # swap
|
||||
count += 1
|
||||
if i >= j:
|
||||
break
|
||||
a[i], a[j] = a[j], a[i] # undo last swap
|
||||
a[l], a[j] = a[j], a[l]
|
||||
return j, count
|
||||
|
||||
|
||||
def _quick(a, l, r):
|
||||
count = 0
|
||||
if l < r:
|
||||
s, count = _partition(a, l, r)
|
||||
count += _quick(a, l, s - 1)
|
||||
count += _quick(a, s + 1, r)
|
||||
return count
|
||||
|
||||
|
||||
def quick(a):
|
||||
return _quick(a, 0, len(a) - 1)
|
||||
|
||||
|
||||
##################################################################
|
||||
# Demonstration
|
||||
##################################################################
|
||||
|
||||
|
||||
def demo():
|
||||
from random import shuffle
|
||||
|
||||
for size in (10, 20, 50, 100, 200, 500, 1000):
|
||||
a = list(range(size))
|
||||
|
||||
# various sort methods
|
||||
shuffle(a)
|
||||
count_selection = selection(a)
|
||||
shuffle(a)
|
||||
count_bubble = bubble(a)
|
||||
shuffle(a)
|
||||
count_merge = merge(a)
|
||||
shuffle(a)
|
||||
count_quick = quick(a)
|
||||
|
||||
print(
|
||||
("size=%5d: selection=%8d, bubble=%8d, " "merge=%6d, quick=%6d")
|
||||
% (size, count_selection, count_bubble, count_merge, count_quick)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo()
|
||||
@@ -0,0 +1,139 @@
|
||||
# Natural Language Toolkit: Word Finder
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Steven Bird <stevenbird1@gmail.com>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
# Simplified from PHP version by Robert Klein <brathna@gmail.com>
|
||||
# http://fswordfinder.sourceforge.net/
|
||||
|
||||
import random
|
||||
|
||||
|
||||
# reverse a word with probability 0.5
|
||||
def revword(word):
|
||||
if random.randint(1, 2) == 1:
|
||||
return word[::-1]
|
||||
return word
|
||||
|
||||
|
||||
# try to insert word at position x,y; direction encoded in xf,yf
|
||||
def step(word, x, xf, y, yf, grid):
|
||||
for i in range(len(word)):
|
||||
if grid[xf(i)][yf(i)] != "" and grid[xf(i)][yf(i)] != word[i]:
|
||||
return False
|
||||
for i in range(len(word)):
|
||||
grid[xf(i)][yf(i)] = word[i]
|
||||
return True
|
||||
|
||||
|
||||
# try to insert word at position x,y, in direction dir
|
||||
def check(word, dir, x, y, grid, rows, cols):
|
||||
if dir == 1:
|
||||
if x - len(word) < 0 or y - len(word) < 0:
|
||||
return False
|
||||
return step(word, x, lambda i: x - i, y, lambda i: y - i, grid)
|
||||
elif dir == 2:
|
||||
if x - len(word) < 0:
|
||||
return False
|
||||
return step(word, x, lambda i: x - i, y, lambda i: y, grid)
|
||||
elif dir == 3:
|
||||
if x - len(word) < 0 or y + (len(word) - 1) >= cols:
|
||||
return False
|
||||
return step(word, x, lambda i: x - i, y, lambda i: y + i, grid)
|
||||
elif dir == 4:
|
||||
if y - len(word) < 0:
|
||||
return False
|
||||
return step(word, x, lambda i: x, y, lambda i: y - i, grid)
|
||||
|
||||
|
||||
def wordfinder(words, rows=20, cols=20, attempts=50, alph="ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
|
||||
"""
|
||||
Attempt to arrange words into a letter-grid with the specified
|
||||
number of rows and columns. Try each word in several positions
|
||||
and directions, until it can be fitted into the grid, or the
|
||||
maximum number of allowable attempts is exceeded. Returns a tuple
|
||||
consisting of the grid and the words that were successfully
|
||||
placed.
|
||||
|
||||
:param words: the list of words to be put into the grid
|
||||
:type words: list
|
||||
:param rows: the number of rows in the grid
|
||||
:type rows: int
|
||||
:param cols: the number of columns in the grid
|
||||
:type cols: int
|
||||
:param attempts: the number of times to attempt placing a word
|
||||
:type attempts: int
|
||||
:param alph: the alphabet, to be used for filling blank cells
|
||||
:type alph: list
|
||||
:rtype: tuple
|
||||
"""
|
||||
|
||||
# place longer words first
|
||||
words = sorted(words, key=len, reverse=True)
|
||||
|
||||
grid = [] # the letter grid
|
||||
used = [] # the words we used
|
||||
|
||||
# initialize the grid
|
||||
for i in range(rows):
|
||||
grid.append([""] * cols)
|
||||
|
||||
# try to place each word
|
||||
for word in words:
|
||||
word = word.strip().upper() # normalize
|
||||
save = word # keep a record of the word
|
||||
word = revword(word)
|
||||
for attempt in range(attempts):
|
||||
r = random.randint(0, len(word))
|
||||
dir = random.choice([1, 2, 3, 4])
|
||||
x = random.randint(0, rows)
|
||||
y = random.randint(0, cols)
|
||||
if dir == 1:
|
||||
x += r
|
||||
y += r
|
||||
elif dir == 2:
|
||||
x += r
|
||||
elif dir == 3:
|
||||
x += r
|
||||
y -= r
|
||||
elif dir == 4:
|
||||
y += r
|
||||
if 0 <= x < rows and 0 <= y < cols:
|
||||
if check(word, dir, x, y, grid, rows, cols):
|
||||
# used.append((save, dir, x, y, word))
|
||||
used.append(save)
|
||||
break
|
||||
|
||||
# Fill up the remaining spaces
|
||||
for i in range(rows):
|
||||
for j in range(cols):
|
||||
if grid[i][j] == "":
|
||||
grid[i][j] = random.choice(alph)
|
||||
|
||||
return grid, used
|
||||
|
||||
|
||||
def word_finder():
|
||||
from nltk.corpus import words
|
||||
|
||||
wordlist = words.words()
|
||||
random.shuffle(wordlist)
|
||||
wordlist = wordlist[:200]
|
||||
wordlist = [w for w in wordlist if 3 <= len(w) <= 12]
|
||||
grid, used = wordfinder(wordlist)
|
||||
|
||||
print("Word Finder\n")
|
||||
for i in range(len(grid)):
|
||||
for j in range(len(grid[i])):
|
||||
print(grid[i][j], end=" ")
|
||||
print()
|
||||
print()
|
||||
|
||||
for i in range(len(used)):
|
||||
print("%d:" % (i + 1), used[i])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
word_finder()
|
||||
Reference in New Issue
Block a user