updates
This commit is contained in:
@@ -0,0 +1,47 @@
|
||||
# Natural Language Toolkit: Applications package
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Edward Loper <edloper@gmail.com>
|
||||
# Steven Bird <stevenbird1@gmail.com>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
Interactive NLTK Applications:
|
||||
|
||||
chartparser: Chart Parser
|
||||
chunkparser: Regular-Expression Chunk Parser
|
||||
collocations: Find collocations in text
|
||||
concordance: Part-of-speech concordancer
|
||||
nemo: Finding (and Replacing) Nemo regular expression tool
|
||||
rdparser: Recursive Descent Parser
|
||||
srparser: Shift-Reduce Parser
|
||||
wordnet: WordNet Browser
|
||||
"""
|
||||
|
||||
|
||||
# Import Tkinter-based modules if Tkinter is installed
|
||||
try:
|
||||
import tkinter
|
||||
except ImportError:
|
||||
import warnings
|
||||
|
||||
warnings.warn("nltk.app package not loaded (please install Tkinter library).")
|
||||
else:
|
||||
from nltk.app.chartparser_app import app as chartparser
|
||||
from nltk.app.chunkparser_app import app as chunkparser
|
||||
from nltk.app.collocations_app import app as collocations
|
||||
from nltk.app.concordance_app import app as concordance
|
||||
from nltk.app.nemo_app import app as nemo
|
||||
from nltk.app.rdparser_app import app as rdparser
|
||||
from nltk.app.srparser_app import app as srparser
|
||||
from nltk.app.wordnet_app import app as wordnet
|
||||
|
||||
try:
|
||||
from matplotlib import pylab
|
||||
except ImportError:
|
||||
import warnings
|
||||
|
||||
warnings.warn("nltk.app.wordfreq not loaded (requires the matplotlib library).")
|
||||
else:
|
||||
from nltk.app.wordfreq_app import app as wordfreq
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,438 @@
|
||||
# Natural Language Toolkit: Collocations Application
|
||||
# Much of the GUI code is imported from concordance.py; We intend to merge these tools together
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Sumukh Ghodke <sghodke@csse.unimelb.edu.au>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
#
|
||||
|
||||
|
||||
import queue as q
|
||||
import threading
|
||||
from tkinter import (
|
||||
END,
|
||||
LEFT,
|
||||
SUNKEN,
|
||||
Button,
|
||||
Frame,
|
||||
IntVar,
|
||||
Label,
|
||||
Menu,
|
||||
OptionMenu,
|
||||
Scrollbar,
|
||||
StringVar,
|
||||
Text,
|
||||
Tk,
|
||||
)
|
||||
from tkinter.font import Font
|
||||
|
||||
from nltk.corpus import (
|
||||
alpino,
|
||||
brown,
|
||||
cess_cat,
|
||||
cess_esp,
|
||||
floresta,
|
||||
indian,
|
||||
mac_morpho,
|
||||
machado,
|
||||
nps_chat,
|
||||
sinica_treebank,
|
||||
treebank,
|
||||
)
|
||||
from nltk.probability import FreqDist
|
||||
from nltk.util import in_idle
|
||||
|
||||
CORPUS_LOADED_EVENT = "<<CL_EVENT>>"
|
||||
ERROR_LOADING_CORPUS_EVENT = "<<ELC_EVENT>>"
|
||||
POLL_INTERVAL = 100
|
||||
|
||||
_DEFAULT = "English: Brown Corpus (Humor)"
|
||||
_CORPORA = {
|
||||
"Catalan: CESS-CAT Corpus": lambda: cess_cat.words(),
|
||||
"English: Brown Corpus": lambda: brown.words(),
|
||||
"English: Brown Corpus (Press)": lambda: brown.words(
|
||||
categories=["news", "editorial", "reviews"]
|
||||
),
|
||||
"English: Brown Corpus (Religion)": lambda: brown.words(categories="religion"),
|
||||
"English: Brown Corpus (Learned)": lambda: brown.words(categories="learned"),
|
||||
"English: Brown Corpus (Science Fiction)": lambda: brown.words(
|
||||
categories="science_fiction"
|
||||
),
|
||||
"English: Brown Corpus (Romance)": lambda: brown.words(categories="romance"),
|
||||
"English: Brown Corpus (Humor)": lambda: brown.words(categories="humor"),
|
||||
"English: NPS Chat Corpus": lambda: nps_chat.words(),
|
||||
"English: Wall Street Journal Corpus": lambda: treebank.words(),
|
||||
"Chinese: Sinica Corpus": lambda: sinica_treebank.words(),
|
||||
"Dutch: Alpino Corpus": lambda: alpino.words(),
|
||||
"Hindi: Indian Languages Corpus": lambda: indian.words(files="hindi.pos"),
|
||||
"Portuguese: Floresta Corpus (Portugal)": lambda: floresta.words(),
|
||||
"Portuguese: MAC-MORPHO Corpus (Brazil)": lambda: mac_morpho.words(),
|
||||
"Portuguese: Machado Corpus (Brazil)": lambda: machado.words(),
|
||||
"Spanish: CESS-ESP Corpus": lambda: cess_esp.words(),
|
||||
}
|
||||
|
||||
|
||||
class CollocationsView:
|
||||
_BACKGROUND_COLOUR = "#FFF" # white
|
||||
|
||||
def __init__(self):
|
||||
self.queue = q.Queue()
|
||||
self.model = CollocationsModel(self.queue)
|
||||
self.top = Tk()
|
||||
self._init_top(self.top)
|
||||
self._init_menubar()
|
||||
self._init_widgets(self.top)
|
||||
self.load_corpus(self.model.DEFAULT_CORPUS)
|
||||
self.after = self.top.after(POLL_INTERVAL, self._poll)
|
||||
|
||||
def _init_top(self, top):
|
||||
top.geometry("550x650+50+50")
|
||||
top.title("NLTK Collocations List")
|
||||
top.bind("<Control-q>", self.destroy)
|
||||
top.protocol("WM_DELETE_WINDOW", self.destroy)
|
||||
top.minsize(550, 650)
|
||||
|
||||
def _init_widgets(self, parent):
|
||||
self.main_frame = Frame(
|
||||
parent, dict(background=self._BACKGROUND_COLOUR, padx=1, pady=1, border=1)
|
||||
)
|
||||
self._init_corpus_select(self.main_frame)
|
||||
self._init_results_box(self.main_frame)
|
||||
self._init_paging(self.main_frame)
|
||||
self._init_status(self.main_frame)
|
||||
self.main_frame.pack(fill="both", expand=True)
|
||||
|
||||
def _init_corpus_select(self, parent):
|
||||
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
|
||||
self.var = StringVar(innerframe)
|
||||
self.var.set(self.model.DEFAULT_CORPUS)
|
||||
Label(
|
||||
innerframe,
|
||||
justify=LEFT,
|
||||
text=" Corpus: ",
|
||||
background=self._BACKGROUND_COLOUR,
|
||||
padx=2,
|
||||
pady=1,
|
||||
border=0,
|
||||
).pack(side="left")
|
||||
|
||||
other_corpora = list(self.model.CORPORA.keys()).remove(
|
||||
self.model.DEFAULT_CORPUS
|
||||
)
|
||||
om = OptionMenu(
|
||||
innerframe,
|
||||
self.var,
|
||||
self.model.DEFAULT_CORPUS,
|
||||
command=self.corpus_selected,
|
||||
*self.model.non_default_corpora()
|
||||
)
|
||||
om["borderwidth"] = 0
|
||||
om["highlightthickness"] = 1
|
||||
om.pack(side="left")
|
||||
innerframe.pack(side="top", fill="x", anchor="n")
|
||||
|
||||
def _init_status(self, parent):
|
||||
self.status = Label(
|
||||
parent,
|
||||
justify=LEFT,
|
||||
relief=SUNKEN,
|
||||
background=self._BACKGROUND_COLOUR,
|
||||
border=0,
|
||||
padx=1,
|
||||
pady=0,
|
||||
)
|
||||
self.status.pack(side="top", anchor="sw")
|
||||
|
||||
def _init_menubar(self):
|
||||
self._result_size = IntVar(self.top)
|
||||
menubar = Menu(self.top)
|
||||
|
||||
filemenu = Menu(menubar, tearoff=0, borderwidth=0)
|
||||
filemenu.add_command(
|
||||
label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q"
|
||||
)
|
||||
menubar.add_cascade(label="File", underline=0, menu=filemenu)
|
||||
|
||||
editmenu = Menu(menubar, tearoff=0)
|
||||
rescntmenu = Menu(editmenu, tearoff=0)
|
||||
rescntmenu.add_radiobutton(
|
||||
label="20",
|
||||
variable=self._result_size,
|
||||
underline=0,
|
||||
value=20,
|
||||
command=self.set_result_size,
|
||||
)
|
||||
rescntmenu.add_radiobutton(
|
||||
label="50",
|
||||
variable=self._result_size,
|
||||
underline=0,
|
||||
value=50,
|
||||
command=self.set_result_size,
|
||||
)
|
||||
rescntmenu.add_radiobutton(
|
||||
label="100",
|
||||
variable=self._result_size,
|
||||
underline=0,
|
||||
value=100,
|
||||
command=self.set_result_size,
|
||||
)
|
||||
rescntmenu.invoke(1)
|
||||
editmenu.add_cascade(label="Result Count", underline=0, menu=rescntmenu)
|
||||
|
||||
menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
|
||||
self.top.config(menu=menubar)
|
||||
|
||||
def set_result_size(self, **kwargs):
|
||||
self.model.result_count = self._result_size.get()
|
||||
|
||||
def _init_results_box(self, parent):
|
||||
innerframe = Frame(parent)
|
||||
i1 = Frame(innerframe)
|
||||
i2 = Frame(innerframe)
|
||||
vscrollbar = Scrollbar(i1, borderwidth=1)
|
||||
hscrollbar = Scrollbar(i2, borderwidth=1, orient="horiz")
|
||||
self.results_box = Text(
|
||||
i1,
|
||||
font=Font(family="courier", size="16"),
|
||||
state="disabled",
|
||||
borderwidth=1,
|
||||
yscrollcommand=vscrollbar.set,
|
||||
xscrollcommand=hscrollbar.set,
|
||||
wrap="none",
|
||||
width="40",
|
||||
height="20",
|
||||
exportselection=1,
|
||||
)
|
||||
self.results_box.pack(side="left", fill="both", expand=True)
|
||||
vscrollbar.pack(side="left", fill="y", anchor="e")
|
||||
vscrollbar.config(command=self.results_box.yview)
|
||||
hscrollbar.pack(side="left", fill="x", expand=True, anchor="w")
|
||||
hscrollbar.config(command=self.results_box.xview)
|
||||
# there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!!
|
||||
Label(i2, text=" ", background=self._BACKGROUND_COLOUR).pack(
|
||||
side="left", anchor="e"
|
||||
)
|
||||
i1.pack(side="top", fill="both", expand=True, anchor="n")
|
||||
i2.pack(side="bottom", fill="x", anchor="s")
|
||||
innerframe.pack(side="top", fill="both", expand=True)
|
||||
|
||||
def _init_paging(self, parent):
|
||||
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
|
||||
self.prev = prev = Button(
|
||||
innerframe,
|
||||
text="Previous",
|
||||
command=self.previous,
|
||||
width="10",
|
||||
borderwidth=1,
|
||||
highlightthickness=1,
|
||||
state="disabled",
|
||||
)
|
||||
prev.pack(side="left", anchor="center")
|
||||
self.next = next = Button(
|
||||
innerframe,
|
||||
text="Next",
|
||||
command=self.__next__,
|
||||
width="10",
|
||||
borderwidth=1,
|
||||
highlightthickness=1,
|
||||
state="disabled",
|
||||
)
|
||||
next.pack(side="right", anchor="center")
|
||||
innerframe.pack(side="top", fill="y")
|
||||
self.reset_current_page()
|
||||
|
||||
def reset_current_page(self):
|
||||
self.current_page = -1
|
||||
|
||||
def _poll(self):
|
||||
try:
|
||||
event = self.queue.get(block=False)
|
||||
except q.Empty:
|
||||
pass
|
||||
else:
|
||||
if event == CORPUS_LOADED_EVENT:
|
||||
self.handle_corpus_loaded(event)
|
||||
elif event == ERROR_LOADING_CORPUS_EVENT:
|
||||
self.handle_error_loading_corpus(event)
|
||||
self.after = self.top.after(POLL_INTERVAL, self._poll)
|
||||
|
||||
def handle_error_loading_corpus(self, event):
|
||||
self.status["text"] = "Error in loading " + self.var.get()
|
||||
self.unfreeze_editable()
|
||||
self.clear_results_box()
|
||||
self.freeze_editable()
|
||||
self.reset_current_page()
|
||||
|
||||
def handle_corpus_loaded(self, event):
|
||||
self.status["text"] = self.var.get() + " is loaded"
|
||||
self.unfreeze_editable()
|
||||
self.clear_results_box()
|
||||
self.reset_current_page()
|
||||
# self.next()
|
||||
collocations = self.model.next(self.current_page + 1)
|
||||
self.write_results(collocations)
|
||||
self.current_page += 1
|
||||
|
||||
def corpus_selected(self, *args):
|
||||
new_selection = self.var.get()
|
||||
self.load_corpus(new_selection)
|
||||
|
||||
def previous(self):
|
||||
self.freeze_editable()
|
||||
collocations = self.model.prev(self.current_page - 1)
|
||||
self.current_page = self.current_page - 1
|
||||
self.clear_results_box()
|
||||
self.write_results(collocations)
|
||||
self.unfreeze_editable()
|
||||
|
||||
def __next__(self):
|
||||
self.freeze_editable()
|
||||
collocations = self.model.next(self.current_page + 1)
|
||||
self.clear_results_box()
|
||||
self.write_results(collocations)
|
||||
self.current_page += 1
|
||||
self.unfreeze_editable()
|
||||
|
||||
def load_corpus(self, selection):
|
||||
if self.model.selected_corpus != selection:
|
||||
self.status["text"] = "Loading " + selection + "..."
|
||||
self.freeze_editable()
|
||||
self.model.load_corpus(selection)
|
||||
|
||||
def freeze_editable(self):
|
||||
self.prev["state"] = "disabled"
|
||||
self.next["state"] = "disabled"
|
||||
|
||||
def clear_results_box(self):
|
||||
self.results_box["state"] = "normal"
|
||||
self.results_box.delete("1.0", END)
|
||||
self.results_box["state"] = "disabled"
|
||||
|
||||
def fire_event(self, event):
|
||||
# Firing an event so that rendering of widgets happen in the mainloop thread
|
||||
self.top.event_generate(event, when="tail")
|
||||
|
||||
def destroy(self, *e):
|
||||
if self.top is None:
|
||||
return
|
||||
self.top.after_cancel(self.after)
|
||||
self.top.destroy()
|
||||
self.top = None
|
||||
|
||||
def mainloop(self, *args, **kwargs):
|
||||
if in_idle():
|
||||
return
|
||||
self.top.mainloop(*args, **kwargs)
|
||||
|
||||
def unfreeze_editable(self):
|
||||
self.set_paging_button_states()
|
||||
|
||||
def set_paging_button_states(self):
|
||||
if self.current_page == -1 or self.current_page == 0:
|
||||
self.prev["state"] = "disabled"
|
||||
else:
|
||||
self.prev["state"] = "normal"
|
||||
if self.model.is_last_page(self.current_page):
|
||||
self.next["state"] = "disabled"
|
||||
else:
|
||||
self.next["state"] = "normal"
|
||||
|
||||
def write_results(self, results):
|
||||
self.results_box["state"] = "normal"
|
||||
row = 1
|
||||
for each in results:
|
||||
self.results_box.insert(str(row) + ".0", each[0] + " " + each[1] + "\n")
|
||||
row += 1
|
||||
self.results_box["state"] = "disabled"
|
||||
|
||||
|
||||
class CollocationsModel:
|
||||
def __init__(self, queue):
|
||||
self.result_count = None
|
||||
self.selected_corpus = None
|
||||
self.collocations = None
|
||||
self.CORPORA = _CORPORA
|
||||
self.DEFAULT_CORPUS = _DEFAULT
|
||||
self.queue = queue
|
||||
self.reset_results()
|
||||
|
||||
def reset_results(self):
|
||||
self.result_pages = []
|
||||
self.results_returned = 0
|
||||
|
||||
def load_corpus(self, name):
|
||||
self.selected_corpus = name
|
||||
self.collocations = None
|
||||
runner_thread = self.LoadCorpus(name, self)
|
||||
runner_thread.start()
|
||||
self.reset_results()
|
||||
|
||||
def non_default_corpora(self):
|
||||
copy = []
|
||||
copy.extend(list(self.CORPORA.keys()))
|
||||
copy.remove(self.DEFAULT_CORPUS)
|
||||
copy.sort()
|
||||
return copy
|
||||
|
||||
def is_last_page(self, number):
|
||||
if number < len(self.result_pages):
|
||||
return False
|
||||
return self.results_returned + (
|
||||
number - len(self.result_pages)
|
||||
) * self.result_count >= len(self.collocations)
|
||||
|
||||
def next(self, page):
|
||||
if (len(self.result_pages) - 1) < page:
|
||||
for i in range(page - (len(self.result_pages) - 1)):
|
||||
self.result_pages.append(
|
||||
self.collocations[
|
||||
self.results_returned : self.results_returned
|
||||
+ self.result_count
|
||||
]
|
||||
)
|
||||
self.results_returned += self.result_count
|
||||
return self.result_pages[page]
|
||||
|
||||
def prev(self, page):
|
||||
if page == -1:
|
||||
return []
|
||||
return self.result_pages[page]
|
||||
|
||||
class LoadCorpus(threading.Thread):
|
||||
def __init__(self, name, model):
|
||||
threading.Thread.__init__(self)
|
||||
self.model, self.name = model, name
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
words = self.model.CORPORA[self.name]()
|
||||
from operator import itemgetter
|
||||
|
||||
text = [w for w in words if len(w) > 2]
|
||||
fd = FreqDist(tuple(text[i : i + 2]) for i in range(len(text) - 1))
|
||||
vocab = FreqDist(text)
|
||||
scored = [
|
||||
((w1, w2), fd[(w1, w2)] ** 3 / (vocab[w1] * vocab[w2]))
|
||||
for w1, w2 in fd
|
||||
]
|
||||
scored.sort(key=itemgetter(1), reverse=True)
|
||||
self.model.collocations = list(map(itemgetter(0), scored))
|
||||
self.model.queue.put(CORPUS_LOADED_EVENT)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
self.model.queue.put(ERROR_LOADING_CORPUS_EVENT)
|
||||
|
||||
|
||||
# def collocations():
|
||||
# colloc_strings = [w1 + ' ' + w2 for w1, w2 in self._collocations[:num]]
|
||||
|
||||
|
||||
def app():
|
||||
c = CollocationsView()
|
||||
c.mainloop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
||||
__all__ = ["app"]
|
||||
@@ -0,0 +1,709 @@
|
||||
# Natural Language Toolkit: Concordance Application
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Sumukh Ghodke <sghodke@csse.unimelb.edu.au>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
import queue as q
|
||||
import re
|
||||
import threading
|
||||
from tkinter import (
|
||||
END,
|
||||
LEFT,
|
||||
SUNKEN,
|
||||
Button,
|
||||
Entry,
|
||||
Frame,
|
||||
IntVar,
|
||||
Label,
|
||||
Menu,
|
||||
OptionMenu,
|
||||
Scrollbar,
|
||||
StringVar,
|
||||
Text,
|
||||
Tk,
|
||||
)
|
||||
from tkinter.font import Font
|
||||
|
||||
from nltk.corpus import (
|
||||
alpino,
|
||||
brown,
|
||||
cess_cat,
|
||||
cess_esp,
|
||||
floresta,
|
||||
indian,
|
||||
mac_morpho,
|
||||
nps_chat,
|
||||
sinica_treebank,
|
||||
treebank,
|
||||
)
|
||||
from nltk.draw.util import ShowText
|
||||
from nltk.util import in_idle
|
||||
|
||||
WORD_OR_TAG = "[^/ ]+"
|
||||
BOUNDARY = r"\b"
|
||||
|
||||
CORPUS_LOADED_EVENT = "<<CL_EVENT>>"
|
||||
SEARCH_TERMINATED_EVENT = "<<ST_EVENT>>"
|
||||
SEARCH_ERROR_EVENT = "<<SE_EVENT>>"
|
||||
ERROR_LOADING_CORPUS_EVENT = "<<ELC_EVENT>>"
|
||||
|
||||
POLL_INTERVAL = 50
|
||||
|
||||
# NB All corpora must be specified in a lambda expression so as not to be
|
||||
# loaded when the module is imported.
|
||||
|
||||
_DEFAULT = "English: Brown Corpus (Humor, simplified)"
|
||||
_CORPORA = {
|
||||
"Catalan: CESS-CAT Corpus (simplified)": lambda: cess_cat.tagged_sents(
|
||||
tagset="universal"
|
||||
),
|
||||
"English: Brown Corpus": lambda: brown.tagged_sents(),
|
||||
"English: Brown Corpus (simplified)": lambda: brown.tagged_sents(
|
||||
tagset="universal"
|
||||
),
|
||||
"English: Brown Corpus (Press, simplified)": lambda: brown.tagged_sents(
|
||||
categories=["news", "editorial", "reviews"], tagset="universal"
|
||||
),
|
||||
"English: Brown Corpus (Religion, simplified)": lambda: brown.tagged_sents(
|
||||
categories="religion", tagset="universal"
|
||||
),
|
||||
"English: Brown Corpus (Learned, simplified)": lambda: brown.tagged_sents(
|
||||
categories="learned", tagset="universal"
|
||||
),
|
||||
"English: Brown Corpus (Science Fiction, simplified)": lambda: brown.tagged_sents(
|
||||
categories="science_fiction", tagset="universal"
|
||||
),
|
||||
"English: Brown Corpus (Romance, simplified)": lambda: brown.tagged_sents(
|
||||
categories="romance", tagset="universal"
|
||||
),
|
||||
"English: Brown Corpus (Humor, simplified)": lambda: brown.tagged_sents(
|
||||
categories="humor", tagset="universal"
|
||||
),
|
||||
"English: NPS Chat Corpus": lambda: nps_chat.tagged_posts(),
|
||||
"English: NPS Chat Corpus (simplified)": lambda: nps_chat.tagged_posts(
|
||||
tagset="universal"
|
||||
),
|
||||
"English: Wall Street Journal Corpus": lambda: treebank.tagged_sents(),
|
||||
"English: Wall Street Journal Corpus (simplified)": lambda: treebank.tagged_sents(
|
||||
tagset="universal"
|
||||
),
|
||||
"Chinese: Sinica Corpus": lambda: sinica_treebank.tagged_sents(),
|
||||
"Chinese: Sinica Corpus (simplified)": lambda: sinica_treebank.tagged_sents(
|
||||
tagset="universal"
|
||||
),
|
||||
"Dutch: Alpino Corpus": lambda: alpino.tagged_sents(),
|
||||
"Dutch: Alpino Corpus (simplified)": lambda: alpino.tagged_sents(
|
||||
tagset="universal"
|
||||
),
|
||||
"Hindi: Indian Languages Corpus": lambda: indian.tagged_sents(files="hindi.pos"),
|
||||
"Hindi: Indian Languages Corpus (simplified)": lambda: indian.tagged_sents(
|
||||
files="hindi.pos", tagset="universal"
|
||||
),
|
||||
"Portuguese: Floresta Corpus (Portugal)": lambda: floresta.tagged_sents(),
|
||||
"Portuguese: Floresta Corpus (Portugal, simplified)": lambda: floresta.tagged_sents(
|
||||
tagset="universal"
|
||||
),
|
||||
"Portuguese: MAC-MORPHO Corpus (Brazil)": lambda: mac_morpho.tagged_sents(),
|
||||
"Portuguese: MAC-MORPHO Corpus (Brazil, simplified)": lambda: mac_morpho.tagged_sents(
|
||||
tagset="universal"
|
||||
),
|
||||
"Spanish: CESS-ESP Corpus (simplified)": lambda: cess_esp.tagged_sents(
|
||||
tagset="universal"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
class ConcordanceSearchView:
|
||||
_BACKGROUND_COLOUR = "#FFF" # white
|
||||
|
||||
# Colour of highlighted results
|
||||
_HIGHLIGHT_WORD_COLOUR = "#F00" # red
|
||||
_HIGHLIGHT_WORD_TAG = "HL_WRD_TAG"
|
||||
|
||||
_HIGHLIGHT_LABEL_COLOUR = "#C0C0C0" # dark grey
|
||||
_HIGHLIGHT_LABEL_TAG = "HL_LBL_TAG"
|
||||
|
||||
# Percentage of text left of the scrollbar position
|
||||
_FRACTION_LEFT_TEXT = 0.30
|
||||
|
||||
def __init__(self):
|
||||
self.queue = q.Queue()
|
||||
self.model = ConcordanceSearchModel(self.queue)
|
||||
self.top = Tk()
|
||||
self._init_top(self.top)
|
||||
self._init_menubar()
|
||||
self._init_widgets(self.top)
|
||||
self.load_corpus(self.model.DEFAULT_CORPUS)
|
||||
self.after = self.top.after(POLL_INTERVAL, self._poll)
|
||||
|
||||
def _init_top(self, top):
|
||||
top.geometry("950x680+50+50")
|
||||
top.title("NLTK Concordance Search")
|
||||
top.bind("<Control-q>", self.destroy)
|
||||
top.protocol("WM_DELETE_WINDOW", self.destroy)
|
||||
top.minsize(950, 680)
|
||||
|
||||
def _init_widgets(self, parent):
|
||||
self.main_frame = Frame(
|
||||
parent, dict(background=self._BACKGROUND_COLOUR, padx=1, pady=1, border=1)
|
||||
)
|
||||
self._init_corpus_select(self.main_frame)
|
||||
self._init_query_box(self.main_frame)
|
||||
self._init_results_box(self.main_frame)
|
||||
self._init_paging(self.main_frame)
|
||||
self._init_status(self.main_frame)
|
||||
self.main_frame.pack(fill="both", expand=True)
|
||||
|
||||
def _init_menubar(self):
|
||||
self._result_size = IntVar(self.top)
|
||||
self._cntx_bf_len = IntVar(self.top)
|
||||
self._cntx_af_len = IntVar(self.top)
|
||||
menubar = Menu(self.top)
|
||||
|
||||
filemenu = Menu(menubar, tearoff=0, borderwidth=0)
|
||||
filemenu.add_command(
|
||||
label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q"
|
||||
)
|
||||
menubar.add_cascade(label="File", underline=0, menu=filemenu)
|
||||
|
||||
editmenu = Menu(menubar, tearoff=0)
|
||||
rescntmenu = Menu(editmenu, tearoff=0)
|
||||
rescntmenu.add_radiobutton(
|
||||
label="20",
|
||||
variable=self._result_size,
|
||||
underline=0,
|
||||
value=20,
|
||||
command=self.set_result_size,
|
||||
)
|
||||
rescntmenu.add_radiobutton(
|
||||
label="50",
|
||||
variable=self._result_size,
|
||||
underline=0,
|
||||
value=50,
|
||||
command=self.set_result_size,
|
||||
)
|
||||
rescntmenu.add_radiobutton(
|
||||
label="100",
|
||||
variable=self._result_size,
|
||||
underline=0,
|
||||
value=100,
|
||||
command=self.set_result_size,
|
||||
)
|
||||
rescntmenu.invoke(1)
|
||||
editmenu.add_cascade(label="Result Count", underline=0, menu=rescntmenu)
|
||||
|
||||
cntxmenu = Menu(editmenu, tearoff=0)
|
||||
cntxbfmenu = Menu(cntxmenu, tearoff=0)
|
||||
cntxbfmenu.add_radiobutton(
|
||||
label="60 characters",
|
||||
variable=self._cntx_bf_len,
|
||||
underline=0,
|
||||
value=60,
|
||||
command=self.set_cntx_bf_len,
|
||||
)
|
||||
cntxbfmenu.add_radiobutton(
|
||||
label="80 characters",
|
||||
variable=self._cntx_bf_len,
|
||||
underline=0,
|
||||
value=80,
|
||||
command=self.set_cntx_bf_len,
|
||||
)
|
||||
cntxbfmenu.add_radiobutton(
|
||||
label="100 characters",
|
||||
variable=self._cntx_bf_len,
|
||||
underline=0,
|
||||
value=100,
|
||||
command=self.set_cntx_bf_len,
|
||||
)
|
||||
cntxbfmenu.invoke(1)
|
||||
cntxmenu.add_cascade(label="Before", underline=0, menu=cntxbfmenu)
|
||||
|
||||
cntxafmenu = Menu(cntxmenu, tearoff=0)
|
||||
cntxafmenu.add_radiobutton(
|
||||
label="70 characters",
|
||||
variable=self._cntx_af_len,
|
||||
underline=0,
|
||||
value=70,
|
||||
command=self.set_cntx_af_len,
|
||||
)
|
||||
cntxafmenu.add_radiobutton(
|
||||
label="90 characters",
|
||||
variable=self._cntx_af_len,
|
||||
underline=0,
|
||||
value=90,
|
||||
command=self.set_cntx_af_len,
|
||||
)
|
||||
cntxafmenu.add_radiobutton(
|
||||
label="110 characters",
|
||||
variable=self._cntx_af_len,
|
||||
underline=0,
|
||||
value=110,
|
||||
command=self.set_cntx_af_len,
|
||||
)
|
||||
cntxafmenu.invoke(1)
|
||||
cntxmenu.add_cascade(label="After", underline=0, menu=cntxafmenu)
|
||||
|
||||
editmenu.add_cascade(label="Context", underline=0, menu=cntxmenu)
|
||||
|
||||
menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
|
||||
|
||||
self.top.config(menu=menubar)
|
||||
|
||||
def set_result_size(self, **kwargs):
|
||||
self.model.result_count = self._result_size.get()
|
||||
|
||||
def set_cntx_af_len(self, **kwargs):
|
||||
self._char_after = self._cntx_af_len.get()
|
||||
|
||||
def set_cntx_bf_len(self, **kwargs):
|
||||
self._char_before = self._cntx_bf_len.get()
|
||||
|
||||
def _init_corpus_select(self, parent):
|
||||
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
|
||||
self.var = StringVar(innerframe)
|
||||
self.var.set(self.model.DEFAULT_CORPUS)
|
||||
Label(
|
||||
innerframe,
|
||||
justify=LEFT,
|
||||
text=" Corpus: ",
|
||||
background=self._BACKGROUND_COLOUR,
|
||||
padx=2,
|
||||
pady=1,
|
||||
border=0,
|
||||
).pack(side="left")
|
||||
|
||||
other_corpora = list(self.model.CORPORA.keys()).remove(
|
||||
self.model.DEFAULT_CORPUS
|
||||
)
|
||||
om = OptionMenu(
|
||||
innerframe,
|
||||
self.var,
|
||||
self.model.DEFAULT_CORPUS,
|
||||
command=self.corpus_selected,
|
||||
*self.model.non_default_corpora()
|
||||
)
|
||||
om["borderwidth"] = 0
|
||||
om["highlightthickness"] = 1
|
||||
om.pack(side="left")
|
||||
innerframe.pack(side="top", fill="x", anchor="n")
|
||||
|
||||
def _init_status(self, parent):
|
||||
self.status = Label(
|
||||
parent,
|
||||
justify=LEFT,
|
||||
relief=SUNKEN,
|
||||
background=self._BACKGROUND_COLOUR,
|
||||
border=0,
|
||||
padx=1,
|
||||
pady=0,
|
||||
)
|
||||
self.status.pack(side="top", anchor="sw")
|
||||
|
||||
def _init_query_box(self, parent):
|
||||
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
|
||||
another = Frame(innerframe, background=self._BACKGROUND_COLOUR)
|
||||
self.query_box = Entry(another, width=60)
|
||||
self.query_box.pack(side="left", fill="x", pady=25, anchor="center")
|
||||
self.search_button = Button(
|
||||
another,
|
||||
text="Search",
|
||||
command=self.search,
|
||||
borderwidth=1,
|
||||
highlightthickness=1,
|
||||
)
|
||||
self.search_button.pack(side="left", fill="x", pady=25, anchor="center")
|
||||
self.query_box.bind("<KeyPress-Return>", self.search_enter_keypress_handler)
|
||||
another.pack()
|
||||
innerframe.pack(side="top", fill="x", anchor="n")
|
||||
|
||||
def search_enter_keypress_handler(self, *event):
|
||||
self.search()
|
||||
|
||||
def _init_results_box(self, parent):
|
||||
innerframe = Frame(parent)
|
||||
i1 = Frame(innerframe)
|
||||
i2 = Frame(innerframe)
|
||||
vscrollbar = Scrollbar(i1, borderwidth=1)
|
||||
hscrollbar = Scrollbar(i2, borderwidth=1, orient="horiz")
|
||||
self.results_box = Text(
|
||||
i1,
|
||||
font=Font(family="courier", size="16"),
|
||||
state="disabled",
|
||||
borderwidth=1,
|
||||
yscrollcommand=vscrollbar.set,
|
||||
xscrollcommand=hscrollbar.set,
|
||||
wrap="none",
|
||||
width="40",
|
||||
height="20",
|
||||
exportselection=1,
|
||||
)
|
||||
self.results_box.pack(side="left", fill="both", expand=True)
|
||||
self.results_box.tag_config(
|
||||
self._HIGHLIGHT_WORD_TAG, foreground=self._HIGHLIGHT_WORD_COLOUR
|
||||
)
|
||||
self.results_box.tag_config(
|
||||
self._HIGHLIGHT_LABEL_TAG, foreground=self._HIGHLIGHT_LABEL_COLOUR
|
||||
)
|
||||
vscrollbar.pack(side="left", fill="y", anchor="e")
|
||||
vscrollbar.config(command=self.results_box.yview)
|
||||
hscrollbar.pack(side="left", fill="x", expand=True, anchor="w")
|
||||
hscrollbar.config(command=self.results_box.xview)
|
||||
# there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!!
|
||||
Label(i2, text=" ", background=self._BACKGROUND_COLOUR).pack(
|
||||
side="left", anchor="e"
|
||||
)
|
||||
i1.pack(side="top", fill="both", expand=True, anchor="n")
|
||||
i2.pack(side="bottom", fill="x", anchor="s")
|
||||
innerframe.pack(side="top", fill="both", expand=True)
|
||||
|
||||
def _init_paging(self, parent):
|
||||
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
|
||||
self.prev = prev = Button(
|
||||
innerframe,
|
||||
text="Previous",
|
||||
command=self.previous,
|
||||
width="10",
|
||||
borderwidth=1,
|
||||
highlightthickness=1,
|
||||
state="disabled",
|
||||
)
|
||||
prev.pack(side="left", anchor="center")
|
||||
self.next = next = Button(
|
||||
innerframe,
|
||||
text="Next",
|
||||
command=self.__next__,
|
||||
width="10",
|
||||
borderwidth=1,
|
||||
highlightthickness=1,
|
||||
state="disabled",
|
||||
)
|
||||
next.pack(side="right", anchor="center")
|
||||
innerframe.pack(side="top", fill="y")
|
||||
self.current_page = 0
|
||||
|
||||
def previous(self):
|
||||
self.clear_results_box()
|
||||
self.freeze_editable()
|
||||
self.model.prev(self.current_page - 1)
|
||||
|
||||
def __next__(self):
|
||||
self.clear_results_box()
|
||||
self.freeze_editable()
|
||||
self.model.next(self.current_page + 1)
|
||||
|
||||
def about(self, *e):
|
||||
ABOUT = "NLTK Concordance Search Demo\n"
|
||||
TITLE = "About: NLTK Concordance Search Demo"
|
||||
try:
|
||||
from tkinter.messagebox import Message
|
||||
|
||||
Message(message=ABOUT, title=TITLE, parent=self.main_frame).show()
|
||||
except:
|
||||
ShowText(self.top, TITLE, ABOUT)
|
||||
|
||||
def _bind_event_handlers(self):
|
||||
self.top.bind(CORPUS_LOADED_EVENT, self.handle_corpus_loaded)
|
||||
self.top.bind(SEARCH_TERMINATED_EVENT, self.handle_search_terminated)
|
||||
self.top.bind(SEARCH_ERROR_EVENT, self.handle_search_error)
|
||||
self.top.bind(ERROR_LOADING_CORPUS_EVENT, self.handle_error_loading_corpus)
|
||||
|
||||
def _poll(self):
|
||||
try:
|
||||
event = self.queue.get(block=False)
|
||||
except q.Empty:
|
||||
pass
|
||||
else:
|
||||
if event == CORPUS_LOADED_EVENT:
|
||||
self.handle_corpus_loaded(event)
|
||||
elif event == SEARCH_TERMINATED_EVENT:
|
||||
self.handle_search_terminated(event)
|
||||
elif event == SEARCH_ERROR_EVENT:
|
||||
self.handle_search_error(event)
|
||||
elif event == ERROR_LOADING_CORPUS_EVENT:
|
||||
self.handle_error_loading_corpus(event)
|
||||
self.after = self.top.after(POLL_INTERVAL, self._poll)
|
||||
|
||||
def handle_error_loading_corpus(self, event):
|
||||
self.status["text"] = "Error in loading " + self.var.get()
|
||||
self.unfreeze_editable()
|
||||
self.clear_all()
|
||||
self.freeze_editable()
|
||||
|
||||
def handle_corpus_loaded(self, event):
|
||||
self.status["text"] = self.var.get() + " is loaded"
|
||||
self.unfreeze_editable()
|
||||
self.clear_all()
|
||||
self.query_box.focus_set()
|
||||
|
||||
def handle_search_terminated(self, event):
|
||||
# todo: refactor the model such that it is less state sensitive
|
||||
results = self.model.get_results()
|
||||
self.write_results(results)
|
||||
self.status["text"] = ""
|
||||
if len(results) == 0:
|
||||
self.status["text"] = "No results found for " + self.model.query
|
||||
else:
|
||||
self.current_page = self.model.last_requested_page
|
||||
self.unfreeze_editable()
|
||||
self.results_box.xview_moveto(self._FRACTION_LEFT_TEXT)
|
||||
|
||||
def handle_search_error(self, event):
|
||||
self.status["text"] = "Error in query " + self.model.query
|
||||
self.unfreeze_editable()
|
||||
|
||||
def corpus_selected(self, *args):
|
||||
new_selection = self.var.get()
|
||||
self.load_corpus(new_selection)
|
||||
|
||||
def load_corpus(self, selection):
|
||||
if self.model.selected_corpus != selection:
|
||||
self.status["text"] = "Loading " + selection + "..."
|
||||
self.freeze_editable()
|
||||
self.model.load_corpus(selection)
|
||||
|
||||
def search(self):
|
||||
self.current_page = 0
|
||||
self.clear_results_box()
|
||||
self.model.reset_results()
|
||||
query = self.query_box.get()
|
||||
if len(query.strip()) == 0:
|
||||
return
|
||||
self.status["text"] = "Searching for " + query
|
||||
self.freeze_editable()
|
||||
self.model.search(query, self.current_page + 1)
|
||||
|
||||
def write_results(self, results):
|
||||
self.results_box["state"] = "normal"
|
||||
row = 1
|
||||
for each in results:
|
||||
sent, pos1, pos2 = each[0].strip(), each[1], each[2]
|
||||
if len(sent) != 0:
|
||||
if pos1 < self._char_before:
|
||||
sent, pos1, pos2 = self.pad(sent, pos1, pos2)
|
||||
sentence = sent[pos1 - self._char_before : pos1 + self._char_after]
|
||||
if not row == len(results):
|
||||
sentence += "\n"
|
||||
self.results_box.insert(str(row) + ".0", sentence)
|
||||
word_markers, label_markers = self.words_and_labels(sent, pos1, pos2)
|
||||
for marker in word_markers:
|
||||
self.results_box.tag_add(
|
||||
self._HIGHLIGHT_WORD_TAG,
|
||||
str(row) + "." + str(marker[0]),
|
||||
str(row) + "." + str(marker[1]),
|
||||
)
|
||||
for marker in label_markers:
|
||||
self.results_box.tag_add(
|
||||
self._HIGHLIGHT_LABEL_TAG,
|
||||
str(row) + "." + str(marker[0]),
|
||||
str(row) + "." + str(marker[1]),
|
||||
)
|
||||
row += 1
|
||||
self.results_box["state"] = "disabled"
|
||||
|
||||
def words_and_labels(self, sentence, pos1, pos2):
|
||||
search_exp = sentence[pos1:pos2]
|
||||
words, labels = [], []
|
||||
labeled_words = search_exp.split(" ")
|
||||
index = 0
|
||||
for each in labeled_words:
|
||||
if each == "":
|
||||
index += 1
|
||||
else:
|
||||
word, label = each.split("/")
|
||||
words.append(
|
||||
(self._char_before + index, self._char_before + index + len(word))
|
||||
)
|
||||
index += len(word) + 1
|
||||
labels.append(
|
||||
(self._char_before + index, self._char_before + index + len(label))
|
||||
)
|
||||
index += len(label)
|
||||
index += 1
|
||||
return words, labels
|
||||
|
||||
def pad(self, sent, hstart, hend):
|
||||
if hstart >= self._char_before:
|
||||
return sent, hstart, hend
|
||||
d = self._char_before - hstart
|
||||
sent = "".join([" "] * d) + sent
|
||||
return sent, hstart + d, hend + d
|
||||
|
||||
def destroy(self, *e):
|
||||
if self.top is None:
|
||||
return
|
||||
self.top.after_cancel(self.after)
|
||||
self.top.destroy()
|
||||
self.top = None
|
||||
|
||||
def clear_all(self):
|
||||
self.query_box.delete(0, END)
|
||||
self.model.reset_query()
|
||||
self.clear_results_box()
|
||||
|
||||
def clear_results_box(self):
|
||||
self.results_box["state"] = "normal"
|
||||
self.results_box.delete("1.0", END)
|
||||
self.results_box["state"] = "disabled"
|
||||
|
||||
def freeze_editable(self):
|
||||
self.query_box["state"] = "disabled"
|
||||
self.search_button["state"] = "disabled"
|
||||
self.prev["state"] = "disabled"
|
||||
self.next["state"] = "disabled"
|
||||
|
||||
def unfreeze_editable(self):
|
||||
self.query_box["state"] = "normal"
|
||||
self.search_button["state"] = "normal"
|
||||
self.set_paging_button_states()
|
||||
|
||||
def set_paging_button_states(self):
|
||||
if self.current_page == 0 or self.current_page == 1:
|
||||
self.prev["state"] = "disabled"
|
||||
else:
|
||||
self.prev["state"] = "normal"
|
||||
if self.model.has_more_pages(self.current_page):
|
||||
self.next["state"] = "normal"
|
||||
else:
|
||||
self.next["state"] = "disabled"
|
||||
|
||||
def fire_event(self, event):
|
||||
# Firing an event so that rendering of widgets happen in the mainloop thread
|
||||
self.top.event_generate(event, when="tail")
|
||||
|
||||
def mainloop(self, *args, **kwargs):
|
||||
if in_idle():
|
||||
return
|
||||
self.top.mainloop(*args, **kwargs)
|
||||
|
||||
|
||||
class ConcordanceSearchModel:
|
||||
def __init__(self, queue):
|
||||
self.queue = queue
|
||||
self.CORPORA = _CORPORA
|
||||
self.DEFAULT_CORPUS = _DEFAULT
|
||||
self.selected_corpus = None
|
||||
self.reset_query()
|
||||
self.reset_results()
|
||||
self.result_count = None
|
||||
self.last_sent_searched = 0
|
||||
|
||||
def non_default_corpora(self):
|
||||
copy = []
|
||||
copy.extend(list(self.CORPORA.keys()))
|
||||
copy.remove(self.DEFAULT_CORPUS)
|
||||
copy.sort()
|
||||
return copy
|
||||
|
||||
def load_corpus(self, name):
|
||||
self.selected_corpus = name
|
||||
self.tagged_sents = []
|
||||
runner_thread = self.LoadCorpus(name, self)
|
||||
runner_thread.start()
|
||||
|
||||
def search(self, query, page):
|
||||
self.query = query
|
||||
self.last_requested_page = page
|
||||
self.SearchCorpus(self, page, self.result_count).start()
|
||||
|
||||
def next(self, page):
|
||||
self.last_requested_page = page
|
||||
if len(self.results) < page:
|
||||
self.search(self.query, page)
|
||||
else:
|
||||
self.queue.put(SEARCH_TERMINATED_EVENT)
|
||||
|
||||
def prev(self, page):
|
||||
self.last_requested_page = page
|
||||
self.queue.put(SEARCH_TERMINATED_EVENT)
|
||||
|
||||
def reset_results(self):
|
||||
self.last_sent_searched = 0
|
||||
self.results = []
|
||||
self.last_page = None
|
||||
|
||||
def reset_query(self):
|
||||
self.query = None
|
||||
|
||||
def set_results(self, page, resultset):
|
||||
self.results.insert(page - 1, resultset)
|
||||
|
||||
def get_results(self):
|
||||
return self.results[self.last_requested_page - 1]
|
||||
|
||||
def has_more_pages(self, page):
|
||||
if self.results == [] or self.results[0] == []:
|
||||
return False
|
||||
if self.last_page is None:
|
||||
return True
|
||||
return page < self.last_page
|
||||
|
||||
class LoadCorpus(threading.Thread):
|
||||
def __init__(self, name, model):
|
||||
threading.Thread.__init__(self)
|
||||
self.model, self.name = model, name
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
ts = self.model.CORPORA[self.name]()
|
||||
self.model.tagged_sents = [
|
||||
" ".join(w + "/" + t for (w, t) in sent) for sent in ts
|
||||
]
|
||||
self.model.queue.put(CORPUS_LOADED_EVENT)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
self.model.queue.put(ERROR_LOADING_CORPUS_EVENT)
|
||||
|
||||
class SearchCorpus(threading.Thread):
|
||||
def __init__(self, model, page, count):
|
||||
self.model, self.count, self.page = model, count, page
|
||||
threading.Thread.__init__(self)
|
||||
|
||||
def run(self):
|
||||
q = self.processed_query()
|
||||
sent_pos, i, sent_count = [], 0, 0
|
||||
for sent in self.model.tagged_sents[self.model.last_sent_searched :]:
|
||||
try:
|
||||
m = re.search(q, sent)
|
||||
except re.error:
|
||||
self.model.reset_results()
|
||||
self.model.queue.put(SEARCH_ERROR_EVENT)
|
||||
return
|
||||
if m:
|
||||
sent_pos.append((sent, m.start(), m.end()))
|
||||
i += 1
|
||||
if i > self.count:
|
||||
self.model.last_sent_searched += sent_count - 1
|
||||
break
|
||||
sent_count += 1
|
||||
if self.count >= len(sent_pos):
|
||||
self.model.last_sent_searched += sent_count - 1
|
||||
self.model.last_page = self.page
|
||||
self.model.set_results(self.page, sent_pos)
|
||||
else:
|
||||
self.model.set_results(self.page, sent_pos[:-1])
|
||||
self.model.queue.put(SEARCH_TERMINATED_EVENT)
|
||||
|
||||
def processed_query(self):
|
||||
new = []
|
||||
for term in self.model.query.split():
|
||||
term = re.sub(r"\.", r"[^/ ]", term)
|
||||
if re.match("[A-Z]+$", term):
|
||||
new.append(BOUNDARY + WORD_OR_TAG + "/" + term + BOUNDARY)
|
||||
elif "/" in term:
|
||||
new.append(BOUNDARY + term + BOUNDARY)
|
||||
else:
|
||||
new.append(BOUNDARY + term + "/" + WORD_OR_TAG + BOUNDARY)
|
||||
return " ".join(new)
|
||||
|
||||
|
||||
def app():
|
||||
d = ConcordanceSearchView()
|
||||
d.mainloop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
||||
__all__ = ["app"]
|
||||
163
Backend/venv/lib/python3.12/site-packages/nltk/app/nemo_app.py
Normal file
163
Backend/venv/lib/python3.12/site-packages/nltk/app/nemo_app.py
Normal file
@@ -0,0 +1,163 @@
|
||||
# Finding (and Replacing) Nemo, Version 1.1, Aristide Grange 2006/06/06
|
||||
# https://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/496783
|
||||
|
||||
"""
|
||||
Finding (and Replacing) Nemo
|
||||
|
||||
Instant Regular Expressions
|
||||
Created by Aristide Grange
|
||||
"""
|
||||
import itertools
|
||||
import re
|
||||
from tkinter import SEL_FIRST, SEL_LAST, Frame, Label, PhotoImage, Scrollbar, Text, Tk
|
||||
|
||||
windowTitle = "Finding (and Replacing) Nemo"
|
||||
initialFind = r"n(.*?)e(.*?)m(.*?)o"
|
||||
initialRepl = r"M\1A\2K\3I"
|
||||
initialText = """\
|
||||
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
|
||||
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
|
||||
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
|
||||
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||
"""
|
||||
images = {
|
||||
"FIND": "R0lGODlhMAAiAPcAMf/////37//35//n1v97Off///f/9/f37/fexvfOvfeEQvd7QvdrQvdrKfdaKfdSMfdSIe/v9+/v7+/v5+/n3u/e1u/Wxu/Gre+1lO+tnO+thO+Ua+97Y+97Oe97Me9rOe9rMe9jOe9jMe9jIe9aMefe5+fe3ufezuece+eEWudzQudaIedSIedKMedKIedCKedCId7e1t7Wzt7Oxt7Gvd69vd69rd61pd6ljN6UjN6Ue96EY95zY95rUt5rQt5jMd5SId5KIdbn59be3tbGztbGvda1rdaEa9Z7a9Z7WtZzQtZzOdZzMdZjMdZaQtZSOdZSMdZKMdZCKdZCGNY5Ic7W1s7Oxs7Gtc69xs69tc69rc6tpc6llM6clM6cjM6Ue86EY85zWs5rSs5SKc5KKc5KGMa1tcatrcalvcalnMaUpcZ7c8ZzMcZrUsZrOcZrMcZaQsZSOcZSMcZKMcZCKcZCGMYxIcYxGL3Gxr21tb21rb2lpb2crb2cjL2UnL2UlL2UhL2Ec717Wr17Ur1zWr1rMb1jUr1KMb1KIb1CIb0xGLWlrbWlpbWcnLWEe7V7c7VzY7VzUrVSKbVKMbVCMbVCIbU5KbUxIbUxEK2lta2lpa2clK2UjK2MnK2MlK2Ea617e61za61rY61rMa1jSq1aUq1aSq1SQq1KKa0xEKWlnKWcnKWUnKWUhKWMjKWEa6Vza6VrWqVjMaVaUqVaKaVSMaVCMaU5KaUxIaUxGJyclJyMe5yElJyEhJx7e5x7c5xrOZxaQpxSOZxKQpw5IZSMhJSEjJR7c5Rre5RrY5RrUpRSQpRSKZRCOZRCKZQxKZQxIYyEhIx7hIxza4xzY4xrc4xjUoxaa4xaUoxSSoxKQoxCMYw5GIR7c4Rzc4Rre4RjY4RjWoRaa4RSWoRSUoRSMYRKQoRCOYQ5KYQxIXtra3taY3taSntKOXtCMXtCKXNCMXM5MXMxIWtSUmtKSmtKQmtCOWs5MWs5KWs5IWNCKWMxIVIxKUIQCDkhGAAAACH+AS4ALAAAAAAwACIAAAj/AAEIHEiwoMGDCBMqXMiwoUOHMqxIeEiRoZVp7cpZ29WrF4WKIAd208dGAQEVbiTVChUjZMU9+pYQmPmBZpxgvVw+nDdKwQICNVcIXQEkTgKdDdUJ+/nggVAXK1xI3TEA6UIr2uJ8iBqka1cXXTlkqGoVYRZ7iLyqBSs0iiEtZQVKiDGxBI1u3NR6lUpGDKg8MSgEQCphU7Z22vhg0dILXRCpYLuSCcYJT4wqXASBQaBzU7klHxC127OHD7ZDJFpERqRt0x5OnwQpmZmCLEhrbgg4WIHO1RY+nbQ9WRGEDJlmnXwJ+9FBgXMCIzYMVijBBgYMFxIMqJBMSc0Ht7qh/+Gjpte2rnYsYeNlasWIBgQ6yCewIoPCCp/cyP/wgUGbXVu0QcADZNBDnh98gHMLGXYQUw02w61QU3wdbNWDbQVVIIhMMwFF1DaZiPLBAy7E04kafrjSizaK3LFNNc0AAYRQDsAHHQlJ2IDQJ2zE1+EKDjiAijShkECCC8Qgw4cr7ZgyzC2WaHPNLWWoNeNWPiRAw0QFWQFMhz8C+QQ20yAiVSrY+MGOJCsccsst2GCzoHFxxEGGC+8hgs0MB2kyCpgzrUDCbs1Es41UdtATHFFkWELMOtsoQsYcgvRRQw5RSDgGOjZMR1AvPQIq6KCo9AKOJWDd48owQlHR4DXEKP9iyRrK+DNNBTu4RwIPFeTAGUG7hAomkA84gEg1m6ADljy9PBKGGJY4ig0xlsTBRSn98FOFDUC8pwQOPkgHbCGAzhTkA850s0c7j6Hjix9+gBIrMXLeAccWXUCyiRBcBEECdEJ98KtAqtBCYQc/OvDENnl4gYpUxISCIjjzylkGGV9okYUVNogRhAOBuuAEhjG08wOgDYzAgA5bCjIoCe5uwUk80RKTTSppPREGGGCIISOQ9AXBg6cC6WIywvCpoMHAocRBwhP4bHLFLujYkV42xNxBRhAyGrc113EgYtRBerDDDHMoDCyQEL5sE083EkgwQyBhxGFHMM206DUixGxmE0wssbQjCQ4JCaFKFwgQTVAVVhQUwAVPIFJKrHfYYRwi6OCDzzuIJIFhXAD0EccPsYRiSyqKSDpFcWSMIcZRoBMkQyA2BGZDIKSYcggih8TRRg4VxM5QABVYYLxgwiev/PLMCxQQADs=",
|
||||
"find": "R0lGODlhMAAiAPQAMf////f39+/v7+fn597e3tbW1s7OzsbGxr29vbW1ta2traWlpZycnJSUlIyMjISEhHt7e3Nzc2tra2NjY1paWlJSUkpKSkJCQjk5OSkpKRgYGAAAAAAAAAAAAAAAAAAAACH+AS4ALAAAAAAwACIAAAX/ICCOZGmeaKquY2AGLiuvMCAUBuHWc48Kh0iFInEYCb4kSQCxPBiMxkMigRQEgJiSFVBYHNGG0RiZOHjblWAiiY4fkDhEYoBp06dAWfyAQyKAgAwDaHgnB0RwgYASgQ0IhDuGJDAIFhMRVFSLEX8QCJJ4AQM5AgQHTZqqjBAOCQQEkWkCDRMUFQsICQ4Vm5maEwwHOAsPDTpKMAsUDlO4CssTcb+2DAp8YGCyNFoCEsZwFQ3QDRTTVBRS0g1QbgsCd5QAAwgIBwYFAwStzQ8UEdCKVchky0yVBw7YuXkAKt4IAg74vXHVagqFBRgXSCAyYWAVCH0SNhDTitCJfSL5/4RbAPKPhQYYjVCYYAvCP0BxEDaD8CheAAHNwqh8MMGPSwgLeJWhwHSjqkYI+xg4MMCEgQjtRvZ7UAYCpghMF7CxONOWJkYR+rCpY4JlVpVxKDwYWEactKW9mhYRtqCTgwgWEMArERSK1j5q//6T8KXonFsShpiJkAECgQYVjykooCVA0JGHEWNiYCHThTFeb3UkoiCCBgwGEKQ1kuAJlhFwhA71h5SukwUM5qqeCSGBgicEWkfNiWSERtBad4JNIBaQBaQah1ToyGZBAnsIuIJs1qnqiAIVjIE2gnAB1T5x0icgzXT79ipgMOOEH6HBbREBMJCeGEY08IoLAkzB1YYFwjxwSUGSNULQJnNUwRYlCcyEkALIxECAP9cNMMABYpRhy3ZsSLDaR70oUAiABGCkAxowCGCAAfDYIQACXoElGRsdXWDBdg2Y90IWktDYGYAB9PWHP0PMdFZaF07SQgAFNDAMAQg0QA1UC8xoZQl22JGFPgWkOUCOL1pZQyhjxinnnCWEAAA7",
|
||||
"REPL": "R0lGODlhMAAjAPcAMf/////3//+lOf+UKf+MEPf///f39/f35/fv7/ecQvecOfecKfeUIfeUGPeUEPeUCPeMAO/37+/v9+/v3u/n3u/n1u+9jO+9c++1hO+ta++tY++tWu+tUu+tSu+lUu+lQu+lMe+UMe+UKe+UGO+UEO+UAO+MCOfv5+fvxufn7+fn5+fnzue9lOe9c+e1jOe1e+e1c+e1a+etWuetUuelQuecOeeUUueUCN7e597e3t7e1t7ezt7evd7Wzt7Oxt7Ovd7Otd7Opd7OnN7Gtd7Gpd69lN61hN6ta96lStbextberdbW3tbWztbWxtbOvdbOrda1hNalUtaECM7W1s7Ozs7Oxs7Otc7Gxs7Gvc69tc69rc69pc61jM6lc8bWlMbOvcbGxsbGpca9tca9pca1nMaMAL3OhL3Gtb21vb21tb2tpb2tnL2tlLW9tbW9pbW9e7W1pbWtjLWcKa21nK2tra2tnK2tlK2lpa2llK2ljK2le6WlnKWljKWUe6WUc6WUY5y1QpyclJycjJychJyUc5yMY5StY5SUe5SMhJSMe5SMc5SMWpSEa5SESoyUe4yMhIyEY4SlKYScWoSMe4SEe4SEa4R7c4R7Y3uMY3uEe3t7e3t7c3tza3tzY3trKXtjIXOcAHOUMXOEY3Nzc3NzWnNrSmulCGuUMWuMGGtzWmtrY2taMWtaGGOUOWOMAGNzUmNjWmNjSmNaUmNaQmNaOWNaIWNSCFqcAFpjUlpSMVpSIVpSEFpKKVKMAFJSUlJSSlJSMVJKMVJKGFJKAFI5CEqUAEqEAEpzQkpKIUpCQkpCGEpCAEo5EEoxAEJjOUJCOUJCAEI5IUIxADl7ADlaITlCOTkxMTkxKTkxEDkhADFzADFrGDE5OTExADEpEClrCCkxKSkpKSkpISkpACkhCCkhACkYACFzACFrACEhCCEYGBhjEBhjABghABgYCBgYABgQEBgQABAQABAIAAhjAAhSAAhKAAgIEAgICABaAABCAAAhAAAQAAAIAAAAAAAAACH+AS4ALAAAAAAwACMAAAj/AAEIHEiwoMGDCBMqXMiwocOHAA4cgEixIIIJO3JMmAjADIqKFU/8MHIkg5EgYXx4iaTkI0iHE6wE2TCggYILQayEAgXIy8uGCKz8sDCAQAMRG3iEcXULlJkJPwli3OFjh9UdYYLE6NBhA04UXHoVA2XoTZgfPKBWlOBDphAWOdfMcfMDLloeO3hIMjbWVCQ5Fn6E2UFxgpsgFjYIEBADrZU6luqEEfqjTqpt54z1uuWqTIcgWAk7PECGzIUQDRosDmxlUrVJkwQJkqVuX71v06YZcyUlROAdbnLAJKPFyAYFAhoMwFlnEh0rWkpz8raPHm7dqKKc/KFFkBUrVn1M/ziBcEIeLUEQI8/AYk0i9Be4sqjsrN66c9/OnbobhpR3HkIUoZ0WVnBE0AGLFKKFD0HAFUQe77HQgQI1hRBDEHMcY0899bBzihZuCPILJD8EccEGGzwAQhFaUHHQH82sUkgeNHISDBk8WCCCcsqFUEQWmOyzjz3sUGNNOO5Y48YOEgowAAQhnBScQV00k82V47jzjy9CXZBcjziFoco//4CDiSOyhPMPLkJZkEBqJmRQxA9uZGEQD8Ncmc044/zzDF2IZQBCCDYE8QMZz/iiCSx0neHGI7BIhhhNn+1gxRpokEcQAp7seWU7/PwTyxqG/iCEEVzQmUombnDRxRExzP9nBR2PCKLFD3UJwcMPa/SRqUGNWJmNOVn+M44ukMRB4KGcWDNLVhuUMEIJAlzwA3DJBHMJIXm4sQYhqyxCRQQGLSIsn1qac2UzysQSyzX/hLMGD0F0IMCODYAQBA9W/PKPOcRiw0wzwxTiokF9dLMnuv/Mo+fCZF7jBr0xbDDCACWEYKgb1vzjDp/jZNOMLX0IZxAKq2TZTjtaOjwOsXyG+s8sZJTIQsUdIGHoJPf8w487QI/TDSt5mGwQFZxc406o8HiDJchk/ltLHpSlJwSvz5DpTjvmuGNOM57koelBOaAhiCaaPBLL0wwbm003peRBnBZqJMJL1ECz/HXYYx/NdAIOOVCxQyLorswymU93o0wuwfAiTDNR/xz0MLXU0XdCE+UwSTRZAq2lsSATu+4wkGvt+TjNzPLrQyegAUku2Hij5cd8LhxyM8QIg4w18HgcdC6BTBFSDmfQqsovttveDcG7lFLHI75cE841sARCxeWsnxC4G9HADPK6ywzDCRqBo0EHHWhMgT1IJzziNci1N7PMKnSYfML96/90AiJKey/0KtbLX1QK0rrNnQ541xugQ7SHhkXBghN0SKACWRc4KlAhBwKcIOYymJCAAAA7",
|
||||
"repl": "R0lGODlhMAAjAPQAMf////f39+/v7+fn597e3tbW1s7OzsbGxr29vbW1ta2traWlpZycnJSUlIyMjISEhHt7e3Nzc2tra2NjY1paWlJSUkpKSkJCQjk5OTExMSkpKSEhIRgYGBAQEAgICAAAACH+AS4ALAAAAAAwACMAAAX/ICCOZGmeaKqubOu+gCDANBkIQ1EMQhAghFptYEAkEgjEwXBo7ISvweGgWCwUysPjwTgEoCafTySYIhYMxgLBjEQgCULvCw0QdAZdoVhUIJUFChISEAxYeQM1N1OMTAp+UwZ5eA4TEhFbDWYFdC4ECVMJjwl5BwsQa0umEhUVlhESDgqlBp0rAn5nVpBMDxeZDRQbHBgWFBSWDgtLBnFjKwRYCI9VqQsPs0YKEcMXFq0UEalFDWx4BAO2IwPjppAKDkrTWKYUGd7fEJJFEZpM00cOzCgh4EE8SaoWxKNixQooBRMyZMBwAYIRBhUgLDGS4MoBJeoANMhAgQsaCRZm/5lqaCUJhA4cNHjDoKEDBlJUHqkBlYBTiQUZNGjYMMxDhY3VWk6R4MEDBoMUak5AqoYBqANIBo4wcGGDUKIeLlzVZmWJggsVIkwAZaQSA3kdZzlKkIiEAAlDvW5oOkEBs488JTw44oeUIwdvVTFTUK7uiAAPgubt8GFDhQepqETAQCFU1UMGzlqAgFhUsAcCS0AO6lUDhw8xNRSbENGDhgWSHjWUe6ACbKITizmopZoBa6KvOwj9uuHDhwxyj3xekgDDhw5EvWKo0IB4iQLCOCC/njc7ZQ8UeGvza+ABZZgcxJNc4FO1gc0cOsCUrHevc8tdIMTIAhc4F198G2Qwwd8CBIQUAwEINABBBJUwR9R5wElgVRLwWODBBx4cGB8GEzDQIAo33CGJA8gh+JoH/clUgQU0YvDhdfmJdwEFC6Sjgg8yEPAABsPkh2F22cl2AQbn6QdTghTQ5eAJAQyQAAQV0MSBB9gRVZ4GE1mw5JZOAmiAVi1UWcAZDrDyZXYTeaOhA/bIVuIBPtKQ4h7ViYekUPdcEAEbzTzCRp5CADmAAwj+ORGPBcgwAAHo9ABGCYtm0ChwFHShlRiXhmHlkAcCiOeUodqQw5W0oXLAiamy4MOkjOyAaqxUymApDCEAADs=",
|
||||
}
|
||||
colors = ["#FF7B39", "#80F121"]
|
||||
emphColors = ["#DAFC33", "#F42548"]
|
||||
fieldParams = {
|
||||
"height": 3,
|
||||
"width": 70,
|
||||
"font": ("monaco", 14),
|
||||
"highlightthickness": 0,
|
||||
"borderwidth": 0,
|
||||
"background": "white",
|
||||
}
|
||||
textParams = {
|
||||
"bg": "#F7E0D4",
|
||||
"fg": "#2321F1",
|
||||
"highlightthickness": 0,
|
||||
"width": 1,
|
||||
"height": 10,
|
||||
"font": ("verdana", 16),
|
||||
"wrap": "word",
|
||||
}
|
||||
|
||||
|
||||
class Zone:
|
||||
def __init__(self, image, initialField, initialText):
|
||||
frm = Frame(root)
|
||||
frm.config(background="white")
|
||||
self.image = PhotoImage(format="gif", data=images[image.upper()])
|
||||
self.imageDimmed = PhotoImage(format="gif", data=images[image])
|
||||
self.img = Label(frm)
|
||||
self.img.config(borderwidth=0)
|
||||
self.img.pack(side="left")
|
||||
self.fld = Text(frm, **fieldParams)
|
||||
self.initScrollText(frm, self.fld, initialField)
|
||||
frm = Frame(root)
|
||||
self.txt = Text(frm, **textParams)
|
||||
self.initScrollText(frm, self.txt, initialText)
|
||||
for i in range(2):
|
||||
self.txt.tag_config(colors[i], background=colors[i])
|
||||
self.txt.tag_config("emph" + colors[i], foreground=emphColors[i])
|
||||
|
||||
def initScrollText(self, frm, txt, contents):
|
||||
scl = Scrollbar(frm)
|
||||
scl.config(command=txt.yview)
|
||||
scl.pack(side="right", fill="y")
|
||||
txt.pack(side="left", expand=True, fill="x")
|
||||
txt.config(yscrollcommand=scl.set)
|
||||
txt.insert("1.0", contents)
|
||||
frm.pack(fill="x")
|
||||
Frame(height=2, bd=1, relief="ridge").pack(fill="x")
|
||||
|
||||
def refresh(self):
|
||||
self.colorCycle = itertools.cycle(colors)
|
||||
try:
|
||||
self.substitute()
|
||||
self.img.config(image=self.image)
|
||||
except re.error:
|
||||
self.img.config(image=self.imageDimmed)
|
||||
|
||||
|
||||
class FindZone(Zone):
|
||||
def addTags(self, m):
|
||||
color = next(self.colorCycle)
|
||||
self.txt.tag_add(color, "1.0+%sc" % m.start(), "1.0+%sc" % m.end())
|
||||
try:
|
||||
self.txt.tag_add(
|
||||
"emph" + color, "1.0+%sc" % m.start("emph"), "1.0+%sc" % m.end("emph")
|
||||
)
|
||||
except:
|
||||
pass
|
||||
|
||||
def substitute(self, *args):
|
||||
for color in colors:
|
||||
self.txt.tag_remove(color, "1.0", "end")
|
||||
self.txt.tag_remove("emph" + color, "1.0", "end")
|
||||
self.rex = re.compile("") # default value in case of malformed regexp
|
||||
self.rex = re.compile(self.fld.get("1.0", "end")[:-1], re.MULTILINE)
|
||||
try:
|
||||
re.compile("(?P<emph>%s)" % self.fld.get(SEL_FIRST, SEL_LAST))
|
||||
self.rexSel = re.compile(
|
||||
"%s(?P<emph>%s)%s"
|
||||
% (
|
||||
self.fld.get("1.0", SEL_FIRST),
|
||||
self.fld.get(SEL_FIRST, SEL_LAST),
|
||||
self.fld.get(SEL_LAST, "end")[:-1],
|
||||
),
|
||||
re.MULTILINE,
|
||||
)
|
||||
except:
|
||||
self.rexSel = self.rex
|
||||
self.rexSel.sub(self.addTags, self.txt.get("1.0", "end"))
|
||||
|
||||
|
||||
class ReplaceZone(Zone):
|
||||
def addTags(self, m):
|
||||
s = sz.rex.sub(self.repl, m.group())
|
||||
self.txt.delete(
|
||||
"1.0+%sc" % (m.start() + self.diff), "1.0+%sc" % (m.end() + self.diff)
|
||||
)
|
||||
self.txt.insert("1.0+%sc" % (m.start() + self.diff), s, next(self.colorCycle))
|
||||
self.diff += len(s) - (m.end() - m.start())
|
||||
|
||||
def substitute(self):
|
||||
self.txt.delete("1.0", "end")
|
||||
self.txt.insert("1.0", sz.txt.get("1.0", "end")[:-1])
|
||||
self.diff = 0
|
||||
self.repl = rex0.sub(r"\\g<\1>", self.fld.get("1.0", "end")[:-1])
|
||||
sz.rex.sub(self.addTags, sz.txt.get("1.0", "end")[:-1])
|
||||
|
||||
|
||||
def launchRefresh(_):
|
||||
sz.fld.after_idle(sz.refresh)
|
||||
rz.fld.after_idle(rz.refresh)
|
||||
|
||||
|
||||
def app():
|
||||
global root, sz, rz, rex0
|
||||
root = Tk()
|
||||
root.resizable(height=False, width=True)
|
||||
root.title(windowTitle)
|
||||
root.minsize(width=250, height=0)
|
||||
sz = FindZone("find", initialFind, initialText)
|
||||
sz.fld.bind("<Button-1>", launchRefresh)
|
||||
sz.fld.bind("<ButtonRelease-1>", launchRefresh)
|
||||
sz.fld.bind("<B1-Motion>", launchRefresh)
|
||||
sz.rexSel = re.compile("")
|
||||
rz = ReplaceZone("repl", initialRepl, "")
|
||||
rex0 = re.compile(r"(?<!\\)\\([0-9]+)")
|
||||
root.bind_all("<Key>", launchRefresh)
|
||||
launchRefresh(None)
|
||||
root.mainloop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
||||
__all__ = ["app"]
|
||||
1052
Backend/venv/lib/python3.12/site-packages/nltk/app/rdparser_app.py
Normal file
1052
Backend/venv/lib/python3.12/site-packages/nltk/app/rdparser_app.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,937 @@
|
||||
# Natural Language Toolkit: Shift-Reduce Parser Application
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Edward Loper <edloper@gmail.com>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
A graphical tool for exploring the shift-reduce parser.
|
||||
|
||||
The shift-reduce parser maintains a stack, which records the structure
|
||||
of the portion of the text that has been parsed. The stack is
|
||||
initially empty. Its contents are shown on the left side of the main
|
||||
canvas.
|
||||
|
||||
On the right side of the main canvas is the remaining text. This is
|
||||
the portion of the text which has not yet been considered by the
|
||||
parser.
|
||||
|
||||
The parser builds up a tree structure for the text using two
|
||||
operations:
|
||||
|
||||
- "shift" moves the first token from the remaining text to the top
|
||||
of the stack. In the demo, the top of the stack is its right-hand
|
||||
side.
|
||||
- "reduce" uses a grammar production to combine the rightmost stack
|
||||
elements into a single tree token.
|
||||
|
||||
You can control the parser's operation by using the "shift" and
|
||||
"reduce" buttons; or you can use the "step" button to let the parser
|
||||
automatically decide which operation to apply. The parser uses the
|
||||
following rules to decide which operation to apply:
|
||||
|
||||
- Only shift if no reductions are available.
|
||||
- If multiple reductions are available, then apply the reduction
|
||||
whose CFG production is listed earliest in the grammar.
|
||||
|
||||
The "reduce" button applies the reduction whose CFG production is
|
||||
listed earliest in the grammar. There are two ways to manually choose
|
||||
which reduction to apply:
|
||||
|
||||
- Click on a CFG production from the list of available reductions,
|
||||
on the left side of the main window. The reduction based on that
|
||||
production will be applied to the top of the stack.
|
||||
- Click on one of the stack elements. A popup window will appear,
|
||||
containing all available reductions. Select one, and it will be
|
||||
applied to the top of the stack.
|
||||
|
||||
Note that reductions can only be applied to the top of the stack.
|
||||
|
||||
Keyboard Shortcuts::
|
||||
[Space]\t Perform the next shift or reduce operation
|
||||
[s]\t Perform a shift operation
|
||||
[r]\t Perform a reduction operation
|
||||
[Ctrl-z]\t Undo most recent operation
|
||||
[Delete]\t Reset the parser
|
||||
[g]\t Show/hide available production list
|
||||
[Ctrl-a]\t Toggle animations
|
||||
[h]\t Help
|
||||
[Ctrl-p]\t Print
|
||||
[q]\t Quit
|
||||
|
||||
"""
|
||||
|
||||
from tkinter import Button, Frame, IntVar, Label, Listbox, Menu, Scrollbar, Tk
|
||||
from tkinter.font import Font
|
||||
|
||||
from nltk.draw import CFGEditor, TreeSegmentWidget, tree_to_treesegment
|
||||
from nltk.draw.util import CanvasFrame, EntryDialog, ShowText, TextWidget
|
||||
from nltk.parse import SteppingShiftReduceParser
|
||||
from nltk.tree import Tree
|
||||
from nltk.util import in_idle
|
||||
|
||||
"""
|
||||
Possible future improvements:
|
||||
- button/window to change and/or select text. Just pop up a window
|
||||
with an entry, and let them modify the text; and then retokenize
|
||||
it? Maybe give a warning if it contains tokens whose types are
|
||||
not in the grammar.
|
||||
- button/window to change and/or select grammar. Select from
|
||||
several alternative grammars? Or actually change the grammar? If
|
||||
the later, then I'd want to define nltk.draw.cfg, which would be
|
||||
responsible for that.
|
||||
"""
|
||||
|
||||
|
||||
class ShiftReduceApp:
|
||||
"""
|
||||
A graphical tool for exploring the shift-reduce parser. The tool
|
||||
displays the parser's stack and the remaining text, and allows the
|
||||
user to control the parser's operation. In particular, the user
|
||||
can shift tokens onto the stack, and can perform reductions on the
|
||||
top elements of the stack. A "step" button simply steps through
|
||||
the parsing process, performing the operations that
|
||||
``nltk.parse.ShiftReduceParser`` would use.
|
||||
"""
|
||||
|
||||
def __init__(self, grammar, sent, trace=0):
|
||||
self._sent = sent
|
||||
self._parser = SteppingShiftReduceParser(grammar, trace)
|
||||
|
||||
# Set up the main window.
|
||||
self._top = Tk()
|
||||
self._top.title("Shift Reduce Parser Application")
|
||||
|
||||
# Animations. animating_lock is a lock to prevent the demo
|
||||
# from performing new operations while it's animating.
|
||||
self._animating_lock = 0
|
||||
self._animate = IntVar(self._top)
|
||||
self._animate.set(10) # = medium
|
||||
|
||||
# The user can hide the grammar.
|
||||
self._show_grammar = IntVar(self._top)
|
||||
self._show_grammar.set(1)
|
||||
|
||||
# Initialize fonts.
|
||||
self._init_fonts(self._top)
|
||||
|
||||
# Set up key bindings.
|
||||
self._init_bindings()
|
||||
|
||||
# Create the basic frames.
|
||||
self._init_menubar(self._top)
|
||||
self._init_buttons(self._top)
|
||||
self._init_feedback(self._top)
|
||||
self._init_grammar(self._top)
|
||||
self._init_canvas(self._top)
|
||||
|
||||
# A popup menu for reducing.
|
||||
self._reduce_menu = Menu(self._canvas, tearoff=0)
|
||||
|
||||
# Reset the demo, and set the feedback frame to empty.
|
||||
self.reset()
|
||||
self._lastoper1["text"] = ""
|
||||
|
||||
#########################################
|
||||
## Initialization Helpers
|
||||
#########################################
|
||||
|
||||
def _init_fonts(self, root):
|
||||
# See: <http://www.astro.washington.edu/owen/ROTKFolklore.html>
|
||||
self._sysfont = Font(font=Button()["font"])
|
||||
root.option_add("*Font", self._sysfont)
|
||||
|
||||
# TWhat's our font size (default=same as sysfont)
|
||||
self._size = IntVar(root)
|
||||
self._size.set(self._sysfont.cget("size"))
|
||||
|
||||
self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get())
|
||||
self._font = Font(family="helvetica", size=self._size.get())
|
||||
|
||||
def _init_grammar(self, parent):
|
||||
# Grammar view.
|
||||
self._prodframe = listframe = Frame(parent)
|
||||
self._prodframe.pack(fill="both", side="left", padx=2)
|
||||
self._prodlist_label = Label(
|
||||
self._prodframe, font=self._boldfont, text="Available Reductions"
|
||||
)
|
||||
self._prodlist_label.pack()
|
||||
self._prodlist = Listbox(
|
||||
self._prodframe,
|
||||
selectmode="single",
|
||||
relief="groove",
|
||||
background="white",
|
||||
foreground="#909090",
|
||||
font=self._font,
|
||||
selectforeground="#004040",
|
||||
selectbackground="#c0f0c0",
|
||||
)
|
||||
|
||||
self._prodlist.pack(side="right", fill="both", expand=1)
|
||||
|
||||
self._productions = list(self._parser.grammar().productions())
|
||||
for production in self._productions:
|
||||
self._prodlist.insert("end", (" %s" % production))
|
||||
self._prodlist.config(height=min(len(self._productions), 25))
|
||||
|
||||
# Add a scrollbar if there are more than 25 productions.
|
||||
if 1: # len(self._productions) > 25:
|
||||
listscroll = Scrollbar(self._prodframe, orient="vertical")
|
||||
self._prodlist.config(yscrollcommand=listscroll.set)
|
||||
listscroll.config(command=self._prodlist.yview)
|
||||
listscroll.pack(side="left", fill="y")
|
||||
|
||||
# If they select a production, apply it.
|
||||
self._prodlist.bind("<<ListboxSelect>>", self._prodlist_select)
|
||||
|
||||
# When they hover over a production, highlight it.
|
||||
self._hover = -1
|
||||
self._prodlist.bind("<Motion>", self._highlight_hover)
|
||||
self._prodlist.bind("<Leave>", self._clear_hover)
|
||||
|
||||
def _init_bindings(self):
|
||||
# Quit
|
||||
self._top.bind("<Control-q>", self.destroy)
|
||||
self._top.bind("<Control-x>", self.destroy)
|
||||
self._top.bind("<Alt-q>", self.destroy)
|
||||
self._top.bind("<Alt-x>", self.destroy)
|
||||
|
||||
# Ops (step, shift, reduce, undo)
|
||||
self._top.bind("<space>", self.step)
|
||||
self._top.bind("<s>", self.shift)
|
||||
self._top.bind("<Alt-s>", self.shift)
|
||||
self._top.bind("<Control-s>", self.shift)
|
||||
self._top.bind("<r>", self.reduce)
|
||||
self._top.bind("<Alt-r>", self.reduce)
|
||||
self._top.bind("<Control-r>", self.reduce)
|
||||
self._top.bind("<Delete>", self.reset)
|
||||
self._top.bind("<u>", self.undo)
|
||||
self._top.bind("<Alt-u>", self.undo)
|
||||
self._top.bind("<Control-u>", self.undo)
|
||||
self._top.bind("<Control-z>", self.undo)
|
||||
self._top.bind("<BackSpace>", self.undo)
|
||||
|
||||
# Misc
|
||||
self._top.bind("<Control-p>", self.postscript)
|
||||
self._top.bind("<Control-h>", self.help)
|
||||
self._top.bind("<F1>", self.help)
|
||||
self._top.bind("<Control-g>", self.edit_grammar)
|
||||
self._top.bind("<Control-t>", self.edit_sentence)
|
||||
|
||||
# Animation speed control
|
||||
self._top.bind("-", lambda e, a=self._animate: a.set(20))
|
||||
self._top.bind("=", lambda e, a=self._animate: a.set(10))
|
||||
self._top.bind("+", lambda e, a=self._animate: a.set(4))
|
||||
|
||||
def _init_buttons(self, parent):
|
||||
# Set up the frames.
|
||||
self._buttonframe = buttonframe = Frame(parent)
|
||||
buttonframe.pack(fill="none", side="bottom")
|
||||
Button(
|
||||
buttonframe,
|
||||
text="Step",
|
||||
background="#90c0d0",
|
||||
foreground="black",
|
||||
command=self.step,
|
||||
).pack(side="left")
|
||||
Button(
|
||||
buttonframe,
|
||||
text="Shift",
|
||||
underline=0,
|
||||
background="#90f090",
|
||||
foreground="black",
|
||||
command=self.shift,
|
||||
).pack(side="left")
|
||||
Button(
|
||||
buttonframe,
|
||||
text="Reduce",
|
||||
underline=0,
|
||||
background="#90f090",
|
||||
foreground="black",
|
||||
command=self.reduce,
|
||||
).pack(side="left")
|
||||
Button(
|
||||
buttonframe,
|
||||
text="Undo",
|
||||
underline=0,
|
||||
background="#f0a0a0",
|
||||
foreground="black",
|
||||
command=self.undo,
|
||||
).pack(side="left")
|
||||
|
||||
def _init_menubar(self, parent):
|
||||
menubar = Menu(parent)
|
||||
|
||||
filemenu = Menu(menubar, tearoff=0)
|
||||
filemenu.add_command(
|
||||
label="Reset Parser", underline=0, command=self.reset, accelerator="Del"
|
||||
)
|
||||
filemenu.add_command(
|
||||
label="Print to Postscript",
|
||||
underline=0,
|
||||
command=self.postscript,
|
||||
accelerator="Ctrl-p",
|
||||
)
|
||||
filemenu.add_command(
|
||||
label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
|
||||
)
|
||||
menubar.add_cascade(label="File", underline=0, menu=filemenu)
|
||||
|
||||
editmenu = Menu(menubar, tearoff=0)
|
||||
editmenu.add_command(
|
||||
label="Edit Grammar",
|
||||
underline=5,
|
||||
command=self.edit_grammar,
|
||||
accelerator="Ctrl-g",
|
||||
)
|
||||
editmenu.add_command(
|
||||
label="Edit Text",
|
||||
underline=5,
|
||||
command=self.edit_sentence,
|
||||
accelerator="Ctrl-t",
|
||||
)
|
||||
menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
|
||||
|
||||
rulemenu = Menu(menubar, tearoff=0)
|
||||
rulemenu.add_command(
|
||||
label="Step", underline=1, command=self.step, accelerator="Space"
|
||||
)
|
||||
rulemenu.add_separator()
|
||||
rulemenu.add_command(
|
||||
label="Shift", underline=0, command=self.shift, accelerator="Ctrl-s"
|
||||
)
|
||||
rulemenu.add_command(
|
||||
label="Reduce", underline=0, command=self.reduce, accelerator="Ctrl-r"
|
||||
)
|
||||
rulemenu.add_separator()
|
||||
rulemenu.add_command(
|
||||
label="Undo", underline=0, command=self.undo, accelerator="Ctrl-u"
|
||||
)
|
||||
menubar.add_cascade(label="Apply", underline=0, menu=rulemenu)
|
||||
|
||||
viewmenu = Menu(menubar, tearoff=0)
|
||||
viewmenu.add_checkbutton(
|
||||
label="Show Grammar",
|
||||
underline=0,
|
||||
variable=self._show_grammar,
|
||||
command=self._toggle_grammar,
|
||||
)
|
||||
viewmenu.add_separator()
|
||||
viewmenu.add_radiobutton(
|
||||
label="Tiny",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=10,
|
||||
command=self.resize,
|
||||
)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Small",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=12,
|
||||
command=self.resize,
|
||||
)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Medium",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=14,
|
||||
command=self.resize,
|
||||
)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Large",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=18,
|
||||
command=self.resize,
|
||||
)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Huge",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=24,
|
||||
command=self.resize,
|
||||
)
|
||||
menubar.add_cascade(label="View", underline=0, menu=viewmenu)
|
||||
|
||||
animatemenu = Menu(menubar, tearoff=0)
|
||||
animatemenu.add_radiobutton(
|
||||
label="No Animation", underline=0, variable=self._animate, value=0
|
||||
)
|
||||
animatemenu.add_radiobutton(
|
||||
label="Slow Animation",
|
||||
underline=0,
|
||||
variable=self._animate,
|
||||
value=20,
|
||||
accelerator="-",
|
||||
)
|
||||
animatemenu.add_radiobutton(
|
||||
label="Normal Animation",
|
||||
underline=0,
|
||||
variable=self._animate,
|
||||
value=10,
|
||||
accelerator="=",
|
||||
)
|
||||
animatemenu.add_radiobutton(
|
||||
label="Fast Animation",
|
||||
underline=0,
|
||||
variable=self._animate,
|
||||
value=4,
|
||||
accelerator="+",
|
||||
)
|
||||
menubar.add_cascade(label="Animate", underline=1, menu=animatemenu)
|
||||
|
||||
helpmenu = Menu(menubar, tearoff=0)
|
||||
helpmenu.add_command(label="About", underline=0, command=self.about)
|
||||
helpmenu.add_command(
|
||||
label="Instructions", underline=0, command=self.help, accelerator="F1"
|
||||
)
|
||||
menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
|
||||
|
||||
parent.config(menu=menubar)
|
||||
|
||||
def _init_feedback(self, parent):
|
||||
self._feedbackframe = feedbackframe = Frame(parent)
|
||||
feedbackframe.pack(fill="x", side="bottom", padx=3, pady=3)
|
||||
self._lastoper_label = Label(
|
||||
feedbackframe, text="Last Operation:", font=self._font
|
||||
)
|
||||
self._lastoper_label.pack(side="left")
|
||||
lastoperframe = Frame(feedbackframe, relief="sunken", border=1)
|
||||
lastoperframe.pack(fill="x", side="right", expand=1, padx=5)
|
||||
self._lastoper1 = Label(
|
||||
lastoperframe, foreground="#007070", background="#f0f0f0", font=self._font
|
||||
)
|
||||
self._lastoper2 = Label(
|
||||
lastoperframe,
|
||||
anchor="w",
|
||||
width=30,
|
||||
foreground="#004040",
|
||||
background="#f0f0f0",
|
||||
font=self._font,
|
||||
)
|
||||
self._lastoper1.pack(side="left")
|
||||
self._lastoper2.pack(side="left", fill="x", expand=1)
|
||||
|
||||
def _init_canvas(self, parent):
|
||||
self._cframe = CanvasFrame(
|
||||
parent,
|
||||
background="white",
|
||||
width=525,
|
||||
closeenough=10,
|
||||
border=2,
|
||||
relief="sunken",
|
||||
)
|
||||
self._cframe.pack(expand=1, fill="both", side="top", pady=2)
|
||||
canvas = self._canvas = self._cframe.canvas()
|
||||
|
||||
self._stackwidgets = []
|
||||
self._rtextwidgets = []
|
||||
self._titlebar = canvas.create_rectangle(
|
||||
0, 0, 0, 0, fill="#c0f0f0", outline="black"
|
||||
)
|
||||
self._exprline = canvas.create_line(0, 0, 0, 0, dash=".")
|
||||
self._stacktop = canvas.create_line(0, 0, 0, 0, fill="#408080")
|
||||
size = self._size.get() + 4
|
||||
self._stacklabel = TextWidget(
|
||||
canvas, "Stack", color="#004040", font=self._boldfont
|
||||
)
|
||||
self._rtextlabel = TextWidget(
|
||||
canvas, "Remaining Text", color="#004040", font=self._boldfont
|
||||
)
|
||||
self._cframe.add_widget(self._stacklabel)
|
||||
self._cframe.add_widget(self._rtextlabel)
|
||||
|
||||
#########################################
|
||||
## Main draw procedure
|
||||
#########################################
|
||||
|
||||
def _redraw(self):
|
||||
scrollregion = self._canvas["scrollregion"].split()
|
||||
(cx1, cy1, cx2, cy2) = (int(c) for c in scrollregion)
|
||||
|
||||
# Delete the old stack & rtext widgets.
|
||||
for stackwidget in self._stackwidgets:
|
||||
self._cframe.destroy_widget(stackwidget)
|
||||
self._stackwidgets = []
|
||||
for rtextwidget in self._rtextwidgets:
|
||||
self._cframe.destroy_widget(rtextwidget)
|
||||
self._rtextwidgets = []
|
||||
|
||||
# Position the titlebar & exprline
|
||||
(x1, y1, x2, y2) = self._stacklabel.bbox()
|
||||
y = y2 - y1 + 10
|
||||
self._canvas.coords(self._titlebar, -5000, 0, 5000, y - 4)
|
||||
self._canvas.coords(self._exprline, 0, y * 2 - 10, 5000, y * 2 - 10)
|
||||
|
||||
# Position the titlebar labels..
|
||||
(x1, y1, x2, y2) = self._stacklabel.bbox()
|
||||
self._stacklabel.move(5 - x1, 3 - y1)
|
||||
(x1, y1, x2, y2) = self._rtextlabel.bbox()
|
||||
self._rtextlabel.move(cx2 - x2 - 5, 3 - y1)
|
||||
|
||||
# Draw the stack.
|
||||
stackx = 5
|
||||
for tok in self._parser.stack():
|
||||
if isinstance(tok, Tree):
|
||||
attribs = {
|
||||
"tree_color": "#4080a0",
|
||||
"tree_width": 2,
|
||||
"node_font": self._boldfont,
|
||||
"node_color": "#006060",
|
||||
"leaf_color": "#006060",
|
||||
"leaf_font": self._font,
|
||||
}
|
||||
widget = tree_to_treesegment(self._canvas, tok, **attribs)
|
||||
widget.label()["color"] = "#000000"
|
||||
else:
|
||||
widget = TextWidget(self._canvas, tok, color="#000000", font=self._font)
|
||||
widget.bind_click(self._popup_reduce)
|
||||
self._stackwidgets.append(widget)
|
||||
self._cframe.add_widget(widget, stackx, y)
|
||||
stackx = widget.bbox()[2] + 10
|
||||
|
||||
# Draw the remaining text.
|
||||
rtextwidth = 0
|
||||
for tok in self._parser.remaining_text():
|
||||
widget = TextWidget(self._canvas, tok, color="#000000", font=self._font)
|
||||
self._rtextwidgets.append(widget)
|
||||
self._cframe.add_widget(widget, rtextwidth, y)
|
||||
rtextwidth = widget.bbox()[2] + 4
|
||||
|
||||
# Allow enough room to shift the next token (for animations)
|
||||
if len(self._rtextwidgets) > 0:
|
||||
stackx += self._rtextwidgets[0].width()
|
||||
|
||||
# Move the remaining text to the correct location (keep it
|
||||
# right-justified, when possible); and move the remaining text
|
||||
# label, if necessary.
|
||||
stackx = max(stackx, self._stacklabel.width() + 25)
|
||||
rlabelwidth = self._rtextlabel.width() + 10
|
||||
if stackx >= cx2 - max(rtextwidth, rlabelwidth):
|
||||
cx2 = stackx + max(rtextwidth, rlabelwidth)
|
||||
for rtextwidget in self._rtextwidgets:
|
||||
rtextwidget.move(4 + cx2 - rtextwidth, 0)
|
||||
self._rtextlabel.move(cx2 - self._rtextlabel.bbox()[2] - 5, 0)
|
||||
|
||||
midx = (stackx + cx2 - max(rtextwidth, rlabelwidth)) / 2
|
||||
self._canvas.coords(self._stacktop, midx, 0, midx, 5000)
|
||||
(x1, y1, x2, y2) = self._stacklabel.bbox()
|
||||
|
||||
# Set up binding to allow them to shift a token by dragging it.
|
||||
if len(self._rtextwidgets) > 0:
|
||||
|
||||
def drag_shift(widget, midx=midx, self=self):
|
||||
if widget.bbox()[0] < midx:
|
||||
self.shift()
|
||||
else:
|
||||
self._redraw()
|
||||
|
||||
self._rtextwidgets[0].bind_drag(drag_shift)
|
||||
self._rtextwidgets[0].bind_click(self.shift)
|
||||
|
||||
# Draw the stack top.
|
||||
self._highlight_productions()
|
||||
|
||||
def _draw_stack_top(self, widget):
|
||||
# hack..
|
||||
midx = widget.bbox()[2] + 50
|
||||
self._canvas.coords(self._stacktop, midx, 0, midx, 5000)
|
||||
|
||||
def _highlight_productions(self):
|
||||
# Highlight the productions that can be reduced.
|
||||
self._prodlist.selection_clear(0, "end")
|
||||
for prod in self._parser.reducible_productions():
|
||||
index = self._productions.index(prod)
|
||||
self._prodlist.selection_set(index)
|
||||
|
||||
#########################################
|
||||
## Button Callbacks
|
||||
#########################################
|
||||
|
||||
def destroy(self, *e):
|
||||
if self._top is None:
|
||||
return
|
||||
self._top.destroy()
|
||||
self._top = None
|
||||
|
||||
def reset(self, *e):
|
||||
self._parser.initialize(self._sent)
|
||||
self._lastoper1["text"] = "Reset App"
|
||||
self._lastoper2["text"] = ""
|
||||
self._redraw()
|
||||
|
||||
def step(self, *e):
|
||||
if self.reduce():
|
||||
return True
|
||||
elif self.shift():
|
||||
return True
|
||||
else:
|
||||
if list(self._parser.parses()):
|
||||
self._lastoper1["text"] = "Finished:"
|
||||
self._lastoper2["text"] = "Success"
|
||||
else:
|
||||
self._lastoper1["text"] = "Finished:"
|
||||
self._lastoper2["text"] = "Failure"
|
||||
|
||||
def shift(self, *e):
|
||||
if self._animating_lock:
|
||||
return
|
||||
if self._parser.shift():
|
||||
tok = self._parser.stack()[-1]
|
||||
self._lastoper1["text"] = "Shift:"
|
||||
self._lastoper2["text"] = "%r" % tok
|
||||
if self._animate.get():
|
||||
self._animate_shift()
|
||||
else:
|
||||
self._redraw()
|
||||
return True
|
||||
return False
|
||||
|
||||
def reduce(self, *e):
|
||||
if self._animating_lock:
|
||||
return
|
||||
production = self._parser.reduce()
|
||||
if production:
|
||||
self._lastoper1["text"] = "Reduce:"
|
||||
self._lastoper2["text"] = "%s" % production
|
||||
if self._animate.get():
|
||||
self._animate_reduce()
|
||||
else:
|
||||
self._redraw()
|
||||
return production
|
||||
|
||||
def undo(self, *e):
|
||||
if self._animating_lock:
|
||||
return
|
||||
if self._parser.undo():
|
||||
self._redraw()
|
||||
|
||||
def postscript(self, *e):
|
||||
self._cframe.print_to_file()
|
||||
|
||||
def mainloop(self, *args, **kwargs):
|
||||
"""
|
||||
Enter the Tkinter mainloop. This function must be called if
|
||||
this demo is created from a non-interactive program (e.g.
|
||||
from a secript); otherwise, the demo will close as soon as
|
||||
the script completes.
|
||||
"""
|
||||
if in_idle():
|
||||
return
|
||||
self._top.mainloop(*args, **kwargs)
|
||||
|
||||
#########################################
|
||||
## Menubar callbacks
|
||||
#########################################
|
||||
|
||||
def resize(self, size=None):
|
||||
if size is not None:
|
||||
self._size.set(size)
|
||||
size = self._size.get()
|
||||
self._font.configure(size=-(abs(size)))
|
||||
self._boldfont.configure(size=-(abs(size)))
|
||||
self._sysfont.configure(size=-(abs(size)))
|
||||
|
||||
# self._stacklabel['font'] = ('helvetica', -size-4, 'bold')
|
||||
# self._rtextlabel['font'] = ('helvetica', -size-4, 'bold')
|
||||
# self._lastoper_label['font'] = ('helvetica', -size)
|
||||
# self._lastoper1['font'] = ('helvetica', -size)
|
||||
# self._lastoper2['font'] = ('helvetica', -size)
|
||||
# self._prodlist['font'] = ('helvetica', -size)
|
||||
# self._prodlist_label['font'] = ('helvetica', -size-2, 'bold')
|
||||
self._redraw()
|
||||
|
||||
def help(self, *e):
|
||||
# The default font's not very legible; try using 'fixed' instead.
|
||||
try:
|
||||
ShowText(
|
||||
self._top,
|
||||
"Help: Shift-Reduce Parser Application",
|
||||
(__doc__ or "").strip(),
|
||||
width=75,
|
||||
font="fixed",
|
||||
)
|
||||
except:
|
||||
ShowText(
|
||||
self._top,
|
||||
"Help: Shift-Reduce Parser Application",
|
||||
(__doc__ or "").strip(),
|
||||
width=75,
|
||||
)
|
||||
|
||||
def about(self, *e):
|
||||
ABOUT = "NLTK Shift-Reduce Parser Application\n" + "Written by Edward Loper"
|
||||
TITLE = "About: Shift-Reduce Parser Application"
|
||||
try:
|
||||
from tkinter.messagebox import Message
|
||||
|
||||
Message(message=ABOUT, title=TITLE).show()
|
||||
except:
|
||||
ShowText(self._top, TITLE, ABOUT)
|
||||
|
||||
def edit_grammar(self, *e):
|
||||
CFGEditor(self._top, self._parser.grammar(), self.set_grammar)
|
||||
|
||||
def set_grammar(self, grammar):
|
||||
self._parser.set_grammar(grammar)
|
||||
self._productions = list(grammar.productions())
|
||||
self._prodlist.delete(0, "end")
|
||||
for production in self._productions:
|
||||
self._prodlist.insert("end", (" %s" % production))
|
||||
|
||||
def edit_sentence(self, *e):
|
||||
sentence = " ".join(self._sent)
|
||||
title = "Edit Text"
|
||||
instr = "Enter a new sentence to parse."
|
||||
EntryDialog(self._top, sentence, instr, self.set_sentence, title)
|
||||
|
||||
def set_sentence(self, sent):
|
||||
self._sent = sent.split() # [XX] use tagged?
|
||||
self.reset()
|
||||
|
||||
#########################################
|
||||
## Reduce Production Selection
|
||||
#########################################
|
||||
|
||||
def _toggle_grammar(self, *e):
|
||||
if self._show_grammar.get():
|
||||
self._prodframe.pack(
|
||||
fill="both", side="left", padx=2, after=self._feedbackframe
|
||||
)
|
||||
self._lastoper1["text"] = "Show Grammar"
|
||||
else:
|
||||
self._prodframe.pack_forget()
|
||||
self._lastoper1["text"] = "Hide Grammar"
|
||||
self._lastoper2["text"] = ""
|
||||
|
||||
def _prodlist_select(self, event):
|
||||
selection = self._prodlist.curselection()
|
||||
if len(selection) != 1:
|
||||
return
|
||||
index = int(selection[0])
|
||||
production = self._parser.reduce(self._productions[index])
|
||||
if production:
|
||||
self._lastoper1["text"] = "Reduce:"
|
||||
self._lastoper2["text"] = "%s" % production
|
||||
if self._animate.get():
|
||||
self._animate_reduce()
|
||||
else:
|
||||
self._redraw()
|
||||
else:
|
||||
# Reset the production selections.
|
||||
self._prodlist.selection_clear(0, "end")
|
||||
for prod in self._parser.reducible_productions():
|
||||
index = self._productions.index(prod)
|
||||
self._prodlist.selection_set(index)
|
||||
|
||||
def _popup_reduce(self, widget):
|
||||
# Remove old commands.
|
||||
productions = self._parser.reducible_productions()
|
||||
if len(productions) == 0:
|
||||
return
|
||||
|
||||
self._reduce_menu.delete(0, "end")
|
||||
for production in productions:
|
||||
self._reduce_menu.add_command(label=str(production), command=self.reduce)
|
||||
self._reduce_menu.post(
|
||||
self._canvas.winfo_pointerx(), self._canvas.winfo_pointery()
|
||||
)
|
||||
|
||||
#########################################
|
||||
## Animations
|
||||
#########################################
|
||||
|
||||
def _animate_shift(self):
|
||||
# What widget are we shifting?
|
||||
widget = self._rtextwidgets[0]
|
||||
|
||||
# Where are we shifting from & to?
|
||||
right = widget.bbox()[0]
|
||||
if len(self._stackwidgets) == 0:
|
||||
left = 5
|
||||
else:
|
||||
left = self._stackwidgets[-1].bbox()[2] + 10
|
||||
|
||||
# Start animating.
|
||||
dt = self._animate.get()
|
||||
dx = (left - right) * 1.0 / dt
|
||||
self._animate_shift_frame(dt, widget, dx)
|
||||
|
||||
def _animate_shift_frame(self, frame, widget, dx):
|
||||
if frame > 0:
|
||||
self._animating_lock = 1
|
||||
widget.move(dx, 0)
|
||||
self._top.after(10, self._animate_shift_frame, frame - 1, widget, dx)
|
||||
else:
|
||||
# but: stacktop??
|
||||
|
||||
# Shift the widget to the stack.
|
||||
del self._rtextwidgets[0]
|
||||
self._stackwidgets.append(widget)
|
||||
self._animating_lock = 0
|
||||
|
||||
# Display the available productions.
|
||||
self._draw_stack_top(widget)
|
||||
self._highlight_productions()
|
||||
|
||||
def _animate_reduce(self):
|
||||
# What widgets are we shifting?
|
||||
numwidgets = len(self._parser.stack()[-1]) # number of children
|
||||
widgets = self._stackwidgets[-numwidgets:]
|
||||
|
||||
# How far are we moving?
|
||||
if isinstance(widgets[0], TreeSegmentWidget):
|
||||
ydist = 15 + widgets[0].label().height()
|
||||
else:
|
||||
ydist = 15 + widgets[0].height()
|
||||
|
||||
# Start animating.
|
||||
dt = self._animate.get()
|
||||
dy = ydist * 2.0 / dt
|
||||
self._animate_reduce_frame(dt / 2, widgets, dy)
|
||||
|
||||
def _animate_reduce_frame(self, frame, widgets, dy):
|
||||
if frame > 0:
|
||||
self._animating_lock = 1
|
||||
for widget in widgets:
|
||||
widget.move(0, dy)
|
||||
self._top.after(10, self._animate_reduce_frame, frame - 1, widgets, dy)
|
||||
else:
|
||||
del self._stackwidgets[-len(widgets) :]
|
||||
for widget in widgets:
|
||||
self._cframe.remove_widget(widget)
|
||||
tok = self._parser.stack()[-1]
|
||||
if not isinstance(tok, Tree):
|
||||
raise ValueError()
|
||||
label = TextWidget(
|
||||
self._canvas, str(tok.label()), color="#006060", font=self._boldfont
|
||||
)
|
||||
widget = TreeSegmentWidget(self._canvas, label, widgets, width=2)
|
||||
(x1, y1, x2, y2) = self._stacklabel.bbox()
|
||||
y = y2 - y1 + 10
|
||||
if not self._stackwidgets:
|
||||
x = 5
|
||||
else:
|
||||
x = self._stackwidgets[-1].bbox()[2] + 10
|
||||
self._cframe.add_widget(widget, x, y)
|
||||
self._stackwidgets.append(widget)
|
||||
|
||||
# Display the available productions.
|
||||
self._draw_stack_top(widget)
|
||||
self._highlight_productions()
|
||||
|
||||
# # Delete the old widgets..
|
||||
# del self._stackwidgets[-len(widgets):]
|
||||
# for widget in widgets:
|
||||
# self._cframe.destroy_widget(widget)
|
||||
#
|
||||
# # Make a new one.
|
||||
# tok = self._parser.stack()[-1]
|
||||
# if isinstance(tok, Tree):
|
||||
# attribs = {'tree_color': '#4080a0', 'tree_width': 2,
|
||||
# 'node_font': bold, 'node_color': '#006060',
|
||||
# 'leaf_color': '#006060', 'leaf_font':self._font}
|
||||
# widget = tree_to_treesegment(self._canvas, tok.type(),
|
||||
# **attribs)
|
||||
# widget.node()['color'] = '#000000'
|
||||
# else:
|
||||
# widget = TextWidget(self._canvas, tok.type(),
|
||||
# color='#000000', font=self._font)
|
||||
# widget.bind_click(self._popup_reduce)
|
||||
# (x1, y1, x2, y2) = self._stacklabel.bbox()
|
||||
# y = y2-y1+10
|
||||
# if not self._stackwidgets: x = 5
|
||||
# else: x = self._stackwidgets[-1].bbox()[2] + 10
|
||||
# self._cframe.add_widget(widget, x, y)
|
||||
# self._stackwidgets.append(widget)
|
||||
|
||||
# self._redraw()
|
||||
self._animating_lock = 0
|
||||
|
||||
#########################################
|
||||
## Hovering.
|
||||
#########################################
|
||||
|
||||
def _highlight_hover(self, event):
|
||||
# What production are we hovering over?
|
||||
index = self._prodlist.nearest(event.y)
|
||||
if self._hover == index:
|
||||
return
|
||||
|
||||
# Clear any previous hover highlighting.
|
||||
self._clear_hover()
|
||||
|
||||
# If the production corresponds to an available reduction,
|
||||
# highlight the stack.
|
||||
selection = [int(s) for s in self._prodlist.curselection()]
|
||||
if index in selection:
|
||||
rhslen = len(self._productions[index].rhs())
|
||||
for stackwidget in self._stackwidgets[-rhslen:]:
|
||||
if isinstance(stackwidget, TreeSegmentWidget):
|
||||
stackwidget.label()["color"] = "#00a000"
|
||||
else:
|
||||
stackwidget["color"] = "#00a000"
|
||||
|
||||
# Remember what production we're hovering over.
|
||||
self._hover = index
|
||||
|
||||
def _clear_hover(self, *event):
|
||||
# Clear any previous hover highlighting.
|
||||
if self._hover == -1:
|
||||
return
|
||||
self._hover = -1
|
||||
for stackwidget in self._stackwidgets:
|
||||
if isinstance(stackwidget, TreeSegmentWidget):
|
||||
stackwidget.label()["color"] = "black"
|
||||
else:
|
||||
stackwidget["color"] = "black"
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
Create a shift reduce parser app, using a simple grammar and
|
||||
text.
|
||||
"""
|
||||
|
||||
from nltk.grammar import CFG, Nonterminal, Production
|
||||
|
||||
nonterminals = "S VP NP PP P N Name V Det"
|
||||
(S, VP, NP, PP, P, N, Name, V, Det) = (Nonterminal(s) for s in nonterminals.split())
|
||||
|
||||
productions = (
|
||||
# Syntactic Productions
|
||||
Production(S, [NP, VP]),
|
||||
Production(NP, [Det, N]),
|
||||
Production(NP, [NP, PP]),
|
||||
Production(VP, [VP, PP]),
|
||||
Production(VP, [V, NP, PP]),
|
||||
Production(VP, [V, NP]),
|
||||
Production(PP, [P, NP]),
|
||||
# Lexical Productions
|
||||
Production(NP, ["I"]),
|
||||
Production(Det, ["the"]),
|
||||
Production(Det, ["a"]),
|
||||
Production(N, ["man"]),
|
||||
Production(V, ["saw"]),
|
||||
Production(P, ["in"]),
|
||||
Production(P, ["with"]),
|
||||
Production(N, ["park"]),
|
||||
Production(N, ["dog"]),
|
||||
Production(N, ["statue"]),
|
||||
Production(Det, ["my"]),
|
||||
)
|
||||
|
||||
grammar = CFG(S, productions)
|
||||
|
||||
# tokenize the sentence
|
||||
sent = "my dog saw a man in the park with a statue".split()
|
||||
|
||||
ShiftReduceApp(grammar, sent).mainloop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
||||
__all__ = ["app"]
|
||||
@@ -0,0 +1,36 @@
|
||||
# Natural Language Toolkit: Wordfreq Application
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Sumukh Ghodke <sghodke@csse.unimelb.edu.au>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
from matplotlib import pylab
|
||||
|
||||
from nltk.corpus import gutenberg
|
||||
from nltk.text import Text
|
||||
|
||||
|
||||
def plot_word_freq_dist(text):
|
||||
fd = text.vocab()
|
||||
|
||||
samples = [item for item, _ in fd.most_common(50)]
|
||||
values = [fd[sample] for sample in samples]
|
||||
values = [sum(values[: i + 1]) * 100.0 / fd.N() for i in range(len(values))]
|
||||
pylab.title(text.name)
|
||||
pylab.xlabel("Samples")
|
||||
pylab.ylabel("Cumulative Percentage")
|
||||
pylab.plot(values)
|
||||
pylab.xticks(range(len(samples)), [str(s) for s in samples], rotation=90)
|
||||
pylab.show()
|
||||
|
||||
|
||||
def app():
|
||||
t1 = Text(gutenberg.words("melville-moby_dick.txt"))
|
||||
plot_word_freq_dist(t1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
||||
__all__ = ["app"]
|
||||
1006
Backend/venv/lib/python3.12/site-packages/nltk/app/wordnet_app.py
Normal file
1006
Backend/venv/lib/python3.12/site-packages/nltk/app/wordnet_app.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user