This commit is contained in:
Iliyan Angelov
2025-12-01 06:50:10 +02:00
parent 91f51bc6fe
commit 62c1fe5951
4682 changed files with 544807 additions and 31208 deletions

View File

@@ -0,0 +1,47 @@
# Natural Language Toolkit: Applications package
#
# Copyright (C) 2001-2025 NLTK Project
# Author: Edward Loper <edloper@gmail.com>
# Steven Bird <stevenbird1@gmail.com>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
"""
Interactive NLTK Applications:
chartparser: Chart Parser
chunkparser: Regular-Expression Chunk Parser
collocations: Find collocations in text
concordance: Part-of-speech concordancer
nemo: Finding (and Replacing) Nemo regular expression tool
rdparser: Recursive Descent Parser
srparser: Shift-Reduce Parser
wordnet: WordNet Browser
"""
# Import Tkinter-based modules if Tkinter is installed
try:
import tkinter
except ImportError:
import warnings
warnings.warn("nltk.app package not loaded (please install Tkinter library).")
else:
from nltk.app.chartparser_app import app as chartparser
from nltk.app.chunkparser_app import app as chunkparser
from nltk.app.collocations_app import app as collocations
from nltk.app.concordance_app import app as concordance
from nltk.app.nemo_app import app as nemo
from nltk.app.rdparser_app import app as rdparser
from nltk.app.srparser_app import app as srparser
from nltk.app.wordnet_app import app as wordnet
try:
from matplotlib import pylab
except ImportError:
import warnings
warnings.warn("nltk.app.wordfreq not loaded (requires the matplotlib library).")
else:
from nltk.app.wordfreq_app import app as wordfreq

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,438 @@
# Natural Language Toolkit: Collocations Application
# Much of the GUI code is imported from concordance.py; We intend to merge these tools together
# Copyright (C) 2001-2025 NLTK Project
# Author: Sumukh Ghodke <sghodke@csse.unimelb.edu.au>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
#
import queue as q
import threading
from tkinter import (
END,
LEFT,
SUNKEN,
Button,
Frame,
IntVar,
Label,
Menu,
OptionMenu,
Scrollbar,
StringVar,
Text,
Tk,
)
from tkinter.font import Font
from nltk.corpus import (
alpino,
brown,
cess_cat,
cess_esp,
floresta,
indian,
mac_morpho,
machado,
nps_chat,
sinica_treebank,
treebank,
)
from nltk.probability import FreqDist
from nltk.util import in_idle
CORPUS_LOADED_EVENT = "<<CL_EVENT>>"
ERROR_LOADING_CORPUS_EVENT = "<<ELC_EVENT>>"
POLL_INTERVAL = 100
_DEFAULT = "English: Brown Corpus (Humor)"
_CORPORA = {
"Catalan: CESS-CAT Corpus": lambda: cess_cat.words(),
"English: Brown Corpus": lambda: brown.words(),
"English: Brown Corpus (Press)": lambda: brown.words(
categories=["news", "editorial", "reviews"]
),
"English: Brown Corpus (Religion)": lambda: brown.words(categories="religion"),
"English: Brown Corpus (Learned)": lambda: brown.words(categories="learned"),
"English: Brown Corpus (Science Fiction)": lambda: brown.words(
categories="science_fiction"
),
"English: Brown Corpus (Romance)": lambda: brown.words(categories="romance"),
"English: Brown Corpus (Humor)": lambda: brown.words(categories="humor"),
"English: NPS Chat Corpus": lambda: nps_chat.words(),
"English: Wall Street Journal Corpus": lambda: treebank.words(),
"Chinese: Sinica Corpus": lambda: sinica_treebank.words(),
"Dutch: Alpino Corpus": lambda: alpino.words(),
"Hindi: Indian Languages Corpus": lambda: indian.words(files="hindi.pos"),
"Portuguese: Floresta Corpus (Portugal)": lambda: floresta.words(),
"Portuguese: MAC-MORPHO Corpus (Brazil)": lambda: mac_morpho.words(),
"Portuguese: Machado Corpus (Brazil)": lambda: machado.words(),
"Spanish: CESS-ESP Corpus": lambda: cess_esp.words(),
}
class CollocationsView:
_BACKGROUND_COLOUR = "#FFF" # white
def __init__(self):
self.queue = q.Queue()
self.model = CollocationsModel(self.queue)
self.top = Tk()
self._init_top(self.top)
self._init_menubar()
self._init_widgets(self.top)
self.load_corpus(self.model.DEFAULT_CORPUS)
self.after = self.top.after(POLL_INTERVAL, self._poll)
def _init_top(self, top):
top.geometry("550x650+50+50")
top.title("NLTK Collocations List")
top.bind("<Control-q>", self.destroy)
top.protocol("WM_DELETE_WINDOW", self.destroy)
top.minsize(550, 650)
def _init_widgets(self, parent):
self.main_frame = Frame(
parent, dict(background=self._BACKGROUND_COLOUR, padx=1, pady=1, border=1)
)
self._init_corpus_select(self.main_frame)
self._init_results_box(self.main_frame)
self._init_paging(self.main_frame)
self._init_status(self.main_frame)
self.main_frame.pack(fill="both", expand=True)
def _init_corpus_select(self, parent):
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
self.var = StringVar(innerframe)
self.var.set(self.model.DEFAULT_CORPUS)
Label(
innerframe,
justify=LEFT,
text=" Corpus: ",
background=self._BACKGROUND_COLOUR,
padx=2,
pady=1,
border=0,
).pack(side="left")
other_corpora = list(self.model.CORPORA.keys()).remove(
self.model.DEFAULT_CORPUS
)
om = OptionMenu(
innerframe,
self.var,
self.model.DEFAULT_CORPUS,
command=self.corpus_selected,
*self.model.non_default_corpora()
)
om["borderwidth"] = 0
om["highlightthickness"] = 1
om.pack(side="left")
innerframe.pack(side="top", fill="x", anchor="n")
def _init_status(self, parent):
self.status = Label(
parent,
justify=LEFT,
relief=SUNKEN,
background=self._BACKGROUND_COLOUR,
border=0,
padx=1,
pady=0,
)
self.status.pack(side="top", anchor="sw")
def _init_menubar(self):
self._result_size = IntVar(self.top)
menubar = Menu(self.top)
filemenu = Menu(menubar, tearoff=0, borderwidth=0)
filemenu.add_command(
label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q"
)
menubar.add_cascade(label="File", underline=0, menu=filemenu)
editmenu = Menu(menubar, tearoff=0)
rescntmenu = Menu(editmenu, tearoff=0)
rescntmenu.add_radiobutton(
label="20",
variable=self._result_size,
underline=0,
value=20,
command=self.set_result_size,
)
rescntmenu.add_radiobutton(
label="50",
variable=self._result_size,
underline=0,
value=50,
command=self.set_result_size,
)
rescntmenu.add_radiobutton(
label="100",
variable=self._result_size,
underline=0,
value=100,
command=self.set_result_size,
)
rescntmenu.invoke(1)
editmenu.add_cascade(label="Result Count", underline=0, menu=rescntmenu)
menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
self.top.config(menu=menubar)
def set_result_size(self, **kwargs):
self.model.result_count = self._result_size.get()
def _init_results_box(self, parent):
innerframe = Frame(parent)
i1 = Frame(innerframe)
i2 = Frame(innerframe)
vscrollbar = Scrollbar(i1, borderwidth=1)
hscrollbar = Scrollbar(i2, borderwidth=1, orient="horiz")
self.results_box = Text(
i1,
font=Font(family="courier", size="16"),
state="disabled",
borderwidth=1,
yscrollcommand=vscrollbar.set,
xscrollcommand=hscrollbar.set,
wrap="none",
width="40",
height="20",
exportselection=1,
)
self.results_box.pack(side="left", fill="both", expand=True)
vscrollbar.pack(side="left", fill="y", anchor="e")
vscrollbar.config(command=self.results_box.yview)
hscrollbar.pack(side="left", fill="x", expand=True, anchor="w")
hscrollbar.config(command=self.results_box.xview)
# there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!!
Label(i2, text=" ", background=self._BACKGROUND_COLOUR).pack(
side="left", anchor="e"
)
i1.pack(side="top", fill="both", expand=True, anchor="n")
i2.pack(side="bottom", fill="x", anchor="s")
innerframe.pack(side="top", fill="both", expand=True)
def _init_paging(self, parent):
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
self.prev = prev = Button(
innerframe,
text="Previous",
command=self.previous,
width="10",
borderwidth=1,
highlightthickness=1,
state="disabled",
)
prev.pack(side="left", anchor="center")
self.next = next = Button(
innerframe,
text="Next",
command=self.__next__,
width="10",
borderwidth=1,
highlightthickness=1,
state="disabled",
)
next.pack(side="right", anchor="center")
innerframe.pack(side="top", fill="y")
self.reset_current_page()
def reset_current_page(self):
self.current_page = -1
def _poll(self):
try:
event = self.queue.get(block=False)
except q.Empty:
pass
else:
if event == CORPUS_LOADED_EVENT:
self.handle_corpus_loaded(event)
elif event == ERROR_LOADING_CORPUS_EVENT:
self.handle_error_loading_corpus(event)
self.after = self.top.after(POLL_INTERVAL, self._poll)
def handle_error_loading_corpus(self, event):
self.status["text"] = "Error in loading " + self.var.get()
self.unfreeze_editable()
self.clear_results_box()
self.freeze_editable()
self.reset_current_page()
def handle_corpus_loaded(self, event):
self.status["text"] = self.var.get() + " is loaded"
self.unfreeze_editable()
self.clear_results_box()
self.reset_current_page()
# self.next()
collocations = self.model.next(self.current_page + 1)
self.write_results(collocations)
self.current_page += 1
def corpus_selected(self, *args):
new_selection = self.var.get()
self.load_corpus(new_selection)
def previous(self):
self.freeze_editable()
collocations = self.model.prev(self.current_page - 1)
self.current_page = self.current_page - 1
self.clear_results_box()
self.write_results(collocations)
self.unfreeze_editable()
def __next__(self):
self.freeze_editable()
collocations = self.model.next(self.current_page + 1)
self.clear_results_box()
self.write_results(collocations)
self.current_page += 1
self.unfreeze_editable()
def load_corpus(self, selection):
if self.model.selected_corpus != selection:
self.status["text"] = "Loading " + selection + "..."
self.freeze_editable()
self.model.load_corpus(selection)
def freeze_editable(self):
self.prev["state"] = "disabled"
self.next["state"] = "disabled"
def clear_results_box(self):
self.results_box["state"] = "normal"
self.results_box.delete("1.0", END)
self.results_box["state"] = "disabled"
def fire_event(self, event):
# Firing an event so that rendering of widgets happen in the mainloop thread
self.top.event_generate(event, when="tail")
def destroy(self, *e):
if self.top is None:
return
self.top.after_cancel(self.after)
self.top.destroy()
self.top = None
def mainloop(self, *args, **kwargs):
if in_idle():
return
self.top.mainloop(*args, **kwargs)
def unfreeze_editable(self):
self.set_paging_button_states()
def set_paging_button_states(self):
if self.current_page == -1 or self.current_page == 0:
self.prev["state"] = "disabled"
else:
self.prev["state"] = "normal"
if self.model.is_last_page(self.current_page):
self.next["state"] = "disabled"
else:
self.next["state"] = "normal"
def write_results(self, results):
self.results_box["state"] = "normal"
row = 1
for each in results:
self.results_box.insert(str(row) + ".0", each[0] + " " + each[1] + "\n")
row += 1
self.results_box["state"] = "disabled"
class CollocationsModel:
def __init__(self, queue):
self.result_count = None
self.selected_corpus = None
self.collocations = None
self.CORPORA = _CORPORA
self.DEFAULT_CORPUS = _DEFAULT
self.queue = queue
self.reset_results()
def reset_results(self):
self.result_pages = []
self.results_returned = 0
def load_corpus(self, name):
self.selected_corpus = name
self.collocations = None
runner_thread = self.LoadCorpus(name, self)
runner_thread.start()
self.reset_results()
def non_default_corpora(self):
copy = []
copy.extend(list(self.CORPORA.keys()))
copy.remove(self.DEFAULT_CORPUS)
copy.sort()
return copy
def is_last_page(self, number):
if number < len(self.result_pages):
return False
return self.results_returned + (
number - len(self.result_pages)
) * self.result_count >= len(self.collocations)
def next(self, page):
if (len(self.result_pages) - 1) < page:
for i in range(page - (len(self.result_pages) - 1)):
self.result_pages.append(
self.collocations[
self.results_returned : self.results_returned
+ self.result_count
]
)
self.results_returned += self.result_count
return self.result_pages[page]
def prev(self, page):
if page == -1:
return []
return self.result_pages[page]
class LoadCorpus(threading.Thread):
def __init__(self, name, model):
threading.Thread.__init__(self)
self.model, self.name = model, name
def run(self):
try:
words = self.model.CORPORA[self.name]()
from operator import itemgetter
text = [w for w in words if len(w) > 2]
fd = FreqDist(tuple(text[i : i + 2]) for i in range(len(text) - 1))
vocab = FreqDist(text)
scored = [
((w1, w2), fd[(w1, w2)] ** 3 / (vocab[w1] * vocab[w2]))
for w1, w2 in fd
]
scored.sort(key=itemgetter(1), reverse=True)
self.model.collocations = list(map(itemgetter(0), scored))
self.model.queue.put(CORPUS_LOADED_EVENT)
except Exception as e:
print(e)
self.model.queue.put(ERROR_LOADING_CORPUS_EVENT)
# def collocations():
# colloc_strings = [w1 + ' ' + w2 for w1, w2 in self._collocations[:num]]
def app():
c = CollocationsView()
c.mainloop()
if __name__ == "__main__":
app()
__all__ = ["app"]

View File

@@ -0,0 +1,709 @@
# Natural Language Toolkit: Concordance Application
#
# Copyright (C) 2001-2025 NLTK Project
# Author: Sumukh Ghodke <sghodke@csse.unimelb.edu.au>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
import queue as q
import re
import threading
from tkinter import (
END,
LEFT,
SUNKEN,
Button,
Entry,
Frame,
IntVar,
Label,
Menu,
OptionMenu,
Scrollbar,
StringVar,
Text,
Tk,
)
from tkinter.font import Font
from nltk.corpus import (
alpino,
brown,
cess_cat,
cess_esp,
floresta,
indian,
mac_morpho,
nps_chat,
sinica_treebank,
treebank,
)
from nltk.draw.util import ShowText
from nltk.util import in_idle
WORD_OR_TAG = "[^/ ]+"
BOUNDARY = r"\b"
CORPUS_LOADED_EVENT = "<<CL_EVENT>>"
SEARCH_TERMINATED_EVENT = "<<ST_EVENT>>"
SEARCH_ERROR_EVENT = "<<SE_EVENT>>"
ERROR_LOADING_CORPUS_EVENT = "<<ELC_EVENT>>"
POLL_INTERVAL = 50
# NB All corpora must be specified in a lambda expression so as not to be
# loaded when the module is imported.
_DEFAULT = "English: Brown Corpus (Humor, simplified)"
_CORPORA = {
"Catalan: CESS-CAT Corpus (simplified)": lambda: cess_cat.tagged_sents(
tagset="universal"
),
"English: Brown Corpus": lambda: brown.tagged_sents(),
"English: Brown Corpus (simplified)": lambda: brown.tagged_sents(
tagset="universal"
),
"English: Brown Corpus (Press, simplified)": lambda: brown.tagged_sents(
categories=["news", "editorial", "reviews"], tagset="universal"
),
"English: Brown Corpus (Religion, simplified)": lambda: brown.tagged_sents(
categories="religion", tagset="universal"
),
"English: Brown Corpus (Learned, simplified)": lambda: brown.tagged_sents(
categories="learned", tagset="universal"
),
"English: Brown Corpus (Science Fiction, simplified)": lambda: brown.tagged_sents(
categories="science_fiction", tagset="universal"
),
"English: Brown Corpus (Romance, simplified)": lambda: brown.tagged_sents(
categories="romance", tagset="universal"
),
"English: Brown Corpus (Humor, simplified)": lambda: brown.tagged_sents(
categories="humor", tagset="universal"
),
"English: NPS Chat Corpus": lambda: nps_chat.tagged_posts(),
"English: NPS Chat Corpus (simplified)": lambda: nps_chat.tagged_posts(
tagset="universal"
),
"English: Wall Street Journal Corpus": lambda: treebank.tagged_sents(),
"English: Wall Street Journal Corpus (simplified)": lambda: treebank.tagged_sents(
tagset="universal"
),
"Chinese: Sinica Corpus": lambda: sinica_treebank.tagged_sents(),
"Chinese: Sinica Corpus (simplified)": lambda: sinica_treebank.tagged_sents(
tagset="universal"
),
"Dutch: Alpino Corpus": lambda: alpino.tagged_sents(),
"Dutch: Alpino Corpus (simplified)": lambda: alpino.tagged_sents(
tagset="universal"
),
"Hindi: Indian Languages Corpus": lambda: indian.tagged_sents(files="hindi.pos"),
"Hindi: Indian Languages Corpus (simplified)": lambda: indian.tagged_sents(
files="hindi.pos", tagset="universal"
),
"Portuguese: Floresta Corpus (Portugal)": lambda: floresta.tagged_sents(),
"Portuguese: Floresta Corpus (Portugal, simplified)": lambda: floresta.tagged_sents(
tagset="universal"
),
"Portuguese: MAC-MORPHO Corpus (Brazil)": lambda: mac_morpho.tagged_sents(),
"Portuguese: MAC-MORPHO Corpus (Brazil, simplified)": lambda: mac_morpho.tagged_sents(
tagset="universal"
),
"Spanish: CESS-ESP Corpus (simplified)": lambda: cess_esp.tagged_sents(
tagset="universal"
),
}
class ConcordanceSearchView:
_BACKGROUND_COLOUR = "#FFF" # white
# Colour of highlighted results
_HIGHLIGHT_WORD_COLOUR = "#F00" # red
_HIGHLIGHT_WORD_TAG = "HL_WRD_TAG"
_HIGHLIGHT_LABEL_COLOUR = "#C0C0C0" # dark grey
_HIGHLIGHT_LABEL_TAG = "HL_LBL_TAG"
# Percentage of text left of the scrollbar position
_FRACTION_LEFT_TEXT = 0.30
def __init__(self):
self.queue = q.Queue()
self.model = ConcordanceSearchModel(self.queue)
self.top = Tk()
self._init_top(self.top)
self._init_menubar()
self._init_widgets(self.top)
self.load_corpus(self.model.DEFAULT_CORPUS)
self.after = self.top.after(POLL_INTERVAL, self._poll)
def _init_top(self, top):
top.geometry("950x680+50+50")
top.title("NLTK Concordance Search")
top.bind("<Control-q>", self.destroy)
top.protocol("WM_DELETE_WINDOW", self.destroy)
top.minsize(950, 680)
def _init_widgets(self, parent):
self.main_frame = Frame(
parent, dict(background=self._BACKGROUND_COLOUR, padx=1, pady=1, border=1)
)
self._init_corpus_select(self.main_frame)
self._init_query_box(self.main_frame)
self._init_results_box(self.main_frame)
self._init_paging(self.main_frame)
self._init_status(self.main_frame)
self.main_frame.pack(fill="both", expand=True)
def _init_menubar(self):
self._result_size = IntVar(self.top)
self._cntx_bf_len = IntVar(self.top)
self._cntx_af_len = IntVar(self.top)
menubar = Menu(self.top)
filemenu = Menu(menubar, tearoff=0, borderwidth=0)
filemenu.add_command(
label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q"
)
menubar.add_cascade(label="File", underline=0, menu=filemenu)
editmenu = Menu(menubar, tearoff=0)
rescntmenu = Menu(editmenu, tearoff=0)
rescntmenu.add_radiobutton(
label="20",
variable=self._result_size,
underline=0,
value=20,
command=self.set_result_size,
)
rescntmenu.add_radiobutton(
label="50",
variable=self._result_size,
underline=0,
value=50,
command=self.set_result_size,
)
rescntmenu.add_radiobutton(
label="100",
variable=self._result_size,
underline=0,
value=100,
command=self.set_result_size,
)
rescntmenu.invoke(1)
editmenu.add_cascade(label="Result Count", underline=0, menu=rescntmenu)
cntxmenu = Menu(editmenu, tearoff=0)
cntxbfmenu = Menu(cntxmenu, tearoff=0)
cntxbfmenu.add_radiobutton(
label="60 characters",
variable=self._cntx_bf_len,
underline=0,
value=60,
command=self.set_cntx_bf_len,
)
cntxbfmenu.add_radiobutton(
label="80 characters",
variable=self._cntx_bf_len,
underline=0,
value=80,
command=self.set_cntx_bf_len,
)
cntxbfmenu.add_radiobutton(
label="100 characters",
variable=self._cntx_bf_len,
underline=0,
value=100,
command=self.set_cntx_bf_len,
)
cntxbfmenu.invoke(1)
cntxmenu.add_cascade(label="Before", underline=0, menu=cntxbfmenu)
cntxafmenu = Menu(cntxmenu, tearoff=0)
cntxafmenu.add_radiobutton(
label="70 characters",
variable=self._cntx_af_len,
underline=0,
value=70,
command=self.set_cntx_af_len,
)
cntxafmenu.add_radiobutton(
label="90 characters",
variable=self._cntx_af_len,
underline=0,
value=90,
command=self.set_cntx_af_len,
)
cntxafmenu.add_radiobutton(
label="110 characters",
variable=self._cntx_af_len,
underline=0,
value=110,
command=self.set_cntx_af_len,
)
cntxafmenu.invoke(1)
cntxmenu.add_cascade(label="After", underline=0, menu=cntxafmenu)
editmenu.add_cascade(label="Context", underline=0, menu=cntxmenu)
menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
self.top.config(menu=menubar)
def set_result_size(self, **kwargs):
self.model.result_count = self._result_size.get()
def set_cntx_af_len(self, **kwargs):
self._char_after = self._cntx_af_len.get()
def set_cntx_bf_len(self, **kwargs):
self._char_before = self._cntx_bf_len.get()
def _init_corpus_select(self, parent):
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
self.var = StringVar(innerframe)
self.var.set(self.model.DEFAULT_CORPUS)
Label(
innerframe,
justify=LEFT,
text=" Corpus: ",
background=self._BACKGROUND_COLOUR,
padx=2,
pady=1,
border=0,
).pack(side="left")
other_corpora = list(self.model.CORPORA.keys()).remove(
self.model.DEFAULT_CORPUS
)
om = OptionMenu(
innerframe,
self.var,
self.model.DEFAULT_CORPUS,
command=self.corpus_selected,
*self.model.non_default_corpora()
)
om["borderwidth"] = 0
om["highlightthickness"] = 1
om.pack(side="left")
innerframe.pack(side="top", fill="x", anchor="n")
def _init_status(self, parent):
self.status = Label(
parent,
justify=LEFT,
relief=SUNKEN,
background=self._BACKGROUND_COLOUR,
border=0,
padx=1,
pady=0,
)
self.status.pack(side="top", anchor="sw")
def _init_query_box(self, parent):
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
another = Frame(innerframe, background=self._BACKGROUND_COLOUR)
self.query_box = Entry(another, width=60)
self.query_box.pack(side="left", fill="x", pady=25, anchor="center")
self.search_button = Button(
another,
text="Search",
command=self.search,
borderwidth=1,
highlightthickness=1,
)
self.search_button.pack(side="left", fill="x", pady=25, anchor="center")
self.query_box.bind("<KeyPress-Return>", self.search_enter_keypress_handler)
another.pack()
innerframe.pack(side="top", fill="x", anchor="n")
def search_enter_keypress_handler(self, *event):
self.search()
def _init_results_box(self, parent):
innerframe = Frame(parent)
i1 = Frame(innerframe)
i2 = Frame(innerframe)
vscrollbar = Scrollbar(i1, borderwidth=1)
hscrollbar = Scrollbar(i2, borderwidth=1, orient="horiz")
self.results_box = Text(
i1,
font=Font(family="courier", size="16"),
state="disabled",
borderwidth=1,
yscrollcommand=vscrollbar.set,
xscrollcommand=hscrollbar.set,
wrap="none",
width="40",
height="20",
exportselection=1,
)
self.results_box.pack(side="left", fill="both", expand=True)
self.results_box.tag_config(
self._HIGHLIGHT_WORD_TAG, foreground=self._HIGHLIGHT_WORD_COLOUR
)
self.results_box.tag_config(
self._HIGHLIGHT_LABEL_TAG, foreground=self._HIGHLIGHT_LABEL_COLOUR
)
vscrollbar.pack(side="left", fill="y", anchor="e")
vscrollbar.config(command=self.results_box.yview)
hscrollbar.pack(side="left", fill="x", expand=True, anchor="w")
hscrollbar.config(command=self.results_box.xview)
# there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!!
Label(i2, text=" ", background=self._BACKGROUND_COLOUR).pack(
side="left", anchor="e"
)
i1.pack(side="top", fill="both", expand=True, anchor="n")
i2.pack(side="bottom", fill="x", anchor="s")
innerframe.pack(side="top", fill="both", expand=True)
def _init_paging(self, parent):
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
self.prev = prev = Button(
innerframe,
text="Previous",
command=self.previous,
width="10",
borderwidth=1,
highlightthickness=1,
state="disabled",
)
prev.pack(side="left", anchor="center")
self.next = next = Button(
innerframe,
text="Next",
command=self.__next__,
width="10",
borderwidth=1,
highlightthickness=1,
state="disabled",
)
next.pack(side="right", anchor="center")
innerframe.pack(side="top", fill="y")
self.current_page = 0
def previous(self):
self.clear_results_box()
self.freeze_editable()
self.model.prev(self.current_page - 1)
def __next__(self):
self.clear_results_box()
self.freeze_editable()
self.model.next(self.current_page + 1)
def about(self, *e):
ABOUT = "NLTK Concordance Search Demo\n"
TITLE = "About: NLTK Concordance Search Demo"
try:
from tkinter.messagebox import Message
Message(message=ABOUT, title=TITLE, parent=self.main_frame).show()
except:
ShowText(self.top, TITLE, ABOUT)
def _bind_event_handlers(self):
self.top.bind(CORPUS_LOADED_EVENT, self.handle_corpus_loaded)
self.top.bind(SEARCH_TERMINATED_EVENT, self.handle_search_terminated)
self.top.bind(SEARCH_ERROR_EVENT, self.handle_search_error)
self.top.bind(ERROR_LOADING_CORPUS_EVENT, self.handle_error_loading_corpus)
def _poll(self):
try:
event = self.queue.get(block=False)
except q.Empty:
pass
else:
if event == CORPUS_LOADED_EVENT:
self.handle_corpus_loaded(event)
elif event == SEARCH_TERMINATED_EVENT:
self.handle_search_terminated(event)
elif event == SEARCH_ERROR_EVENT:
self.handle_search_error(event)
elif event == ERROR_LOADING_CORPUS_EVENT:
self.handle_error_loading_corpus(event)
self.after = self.top.after(POLL_INTERVAL, self._poll)
def handle_error_loading_corpus(self, event):
self.status["text"] = "Error in loading " + self.var.get()
self.unfreeze_editable()
self.clear_all()
self.freeze_editable()
def handle_corpus_loaded(self, event):
self.status["text"] = self.var.get() + " is loaded"
self.unfreeze_editable()
self.clear_all()
self.query_box.focus_set()
def handle_search_terminated(self, event):
# todo: refactor the model such that it is less state sensitive
results = self.model.get_results()
self.write_results(results)
self.status["text"] = ""
if len(results) == 0:
self.status["text"] = "No results found for " + self.model.query
else:
self.current_page = self.model.last_requested_page
self.unfreeze_editable()
self.results_box.xview_moveto(self._FRACTION_LEFT_TEXT)
def handle_search_error(self, event):
self.status["text"] = "Error in query " + self.model.query
self.unfreeze_editable()
def corpus_selected(self, *args):
new_selection = self.var.get()
self.load_corpus(new_selection)
def load_corpus(self, selection):
if self.model.selected_corpus != selection:
self.status["text"] = "Loading " + selection + "..."
self.freeze_editable()
self.model.load_corpus(selection)
def search(self):
self.current_page = 0
self.clear_results_box()
self.model.reset_results()
query = self.query_box.get()
if len(query.strip()) == 0:
return
self.status["text"] = "Searching for " + query
self.freeze_editable()
self.model.search(query, self.current_page + 1)
def write_results(self, results):
self.results_box["state"] = "normal"
row = 1
for each in results:
sent, pos1, pos2 = each[0].strip(), each[1], each[2]
if len(sent) != 0:
if pos1 < self._char_before:
sent, pos1, pos2 = self.pad(sent, pos1, pos2)
sentence = sent[pos1 - self._char_before : pos1 + self._char_after]
if not row == len(results):
sentence += "\n"
self.results_box.insert(str(row) + ".0", sentence)
word_markers, label_markers = self.words_and_labels(sent, pos1, pos2)
for marker in word_markers:
self.results_box.tag_add(
self._HIGHLIGHT_WORD_TAG,
str(row) + "." + str(marker[0]),
str(row) + "." + str(marker[1]),
)
for marker in label_markers:
self.results_box.tag_add(
self._HIGHLIGHT_LABEL_TAG,
str(row) + "." + str(marker[0]),
str(row) + "." + str(marker[1]),
)
row += 1
self.results_box["state"] = "disabled"
def words_and_labels(self, sentence, pos1, pos2):
search_exp = sentence[pos1:pos2]
words, labels = [], []
labeled_words = search_exp.split(" ")
index = 0
for each in labeled_words:
if each == "":
index += 1
else:
word, label = each.split("/")
words.append(
(self._char_before + index, self._char_before + index + len(word))
)
index += len(word) + 1
labels.append(
(self._char_before + index, self._char_before + index + len(label))
)
index += len(label)
index += 1
return words, labels
def pad(self, sent, hstart, hend):
if hstart >= self._char_before:
return sent, hstart, hend
d = self._char_before - hstart
sent = "".join([" "] * d) + sent
return sent, hstart + d, hend + d
def destroy(self, *e):
if self.top is None:
return
self.top.after_cancel(self.after)
self.top.destroy()
self.top = None
def clear_all(self):
self.query_box.delete(0, END)
self.model.reset_query()
self.clear_results_box()
def clear_results_box(self):
self.results_box["state"] = "normal"
self.results_box.delete("1.0", END)
self.results_box["state"] = "disabled"
def freeze_editable(self):
self.query_box["state"] = "disabled"
self.search_button["state"] = "disabled"
self.prev["state"] = "disabled"
self.next["state"] = "disabled"
def unfreeze_editable(self):
self.query_box["state"] = "normal"
self.search_button["state"] = "normal"
self.set_paging_button_states()
def set_paging_button_states(self):
if self.current_page == 0 or self.current_page == 1:
self.prev["state"] = "disabled"
else:
self.prev["state"] = "normal"
if self.model.has_more_pages(self.current_page):
self.next["state"] = "normal"
else:
self.next["state"] = "disabled"
def fire_event(self, event):
# Firing an event so that rendering of widgets happen in the mainloop thread
self.top.event_generate(event, when="tail")
def mainloop(self, *args, **kwargs):
if in_idle():
return
self.top.mainloop(*args, **kwargs)
class ConcordanceSearchModel:
def __init__(self, queue):
self.queue = queue
self.CORPORA = _CORPORA
self.DEFAULT_CORPUS = _DEFAULT
self.selected_corpus = None
self.reset_query()
self.reset_results()
self.result_count = None
self.last_sent_searched = 0
def non_default_corpora(self):
copy = []
copy.extend(list(self.CORPORA.keys()))
copy.remove(self.DEFAULT_CORPUS)
copy.sort()
return copy
def load_corpus(self, name):
self.selected_corpus = name
self.tagged_sents = []
runner_thread = self.LoadCorpus(name, self)
runner_thread.start()
def search(self, query, page):
self.query = query
self.last_requested_page = page
self.SearchCorpus(self, page, self.result_count).start()
def next(self, page):
self.last_requested_page = page
if len(self.results) < page:
self.search(self.query, page)
else:
self.queue.put(SEARCH_TERMINATED_EVENT)
def prev(self, page):
self.last_requested_page = page
self.queue.put(SEARCH_TERMINATED_EVENT)
def reset_results(self):
self.last_sent_searched = 0
self.results = []
self.last_page = None
def reset_query(self):
self.query = None
def set_results(self, page, resultset):
self.results.insert(page - 1, resultset)
def get_results(self):
return self.results[self.last_requested_page - 1]
def has_more_pages(self, page):
if self.results == [] or self.results[0] == []:
return False
if self.last_page is None:
return True
return page < self.last_page
class LoadCorpus(threading.Thread):
def __init__(self, name, model):
threading.Thread.__init__(self)
self.model, self.name = model, name
def run(self):
try:
ts = self.model.CORPORA[self.name]()
self.model.tagged_sents = [
" ".join(w + "/" + t for (w, t) in sent) for sent in ts
]
self.model.queue.put(CORPUS_LOADED_EVENT)
except Exception as e:
print(e)
self.model.queue.put(ERROR_LOADING_CORPUS_EVENT)
class SearchCorpus(threading.Thread):
def __init__(self, model, page, count):
self.model, self.count, self.page = model, count, page
threading.Thread.__init__(self)
def run(self):
q = self.processed_query()
sent_pos, i, sent_count = [], 0, 0
for sent in self.model.tagged_sents[self.model.last_sent_searched :]:
try:
m = re.search(q, sent)
except re.error:
self.model.reset_results()
self.model.queue.put(SEARCH_ERROR_EVENT)
return
if m:
sent_pos.append((sent, m.start(), m.end()))
i += 1
if i > self.count:
self.model.last_sent_searched += sent_count - 1
break
sent_count += 1
if self.count >= len(sent_pos):
self.model.last_sent_searched += sent_count - 1
self.model.last_page = self.page
self.model.set_results(self.page, sent_pos)
else:
self.model.set_results(self.page, sent_pos[:-1])
self.model.queue.put(SEARCH_TERMINATED_EVENT)
def processed_query(self):
new = []
for term in self.model.query.split():
term = re.sub(r"\.", r"[^/ ]", term)
if re.match("[A-Z]+$", term):
new.append(BOUNDARY + WORD_OR_TAG + "/" + term + BOUNDARY)
elif "/" in term:
new.append(BOUNDARY + term + BOUNDARY)
else:
new.append(BOUNDARY + term + "/" + WORD_OR_TAG + BOUNDARY)
return " ".join(new)
def app():
d = ConcordanceSearchView()
d.mainloop()
if __name__ == "__main__":
app()
__all__ = ["app"]

View File

@@ -0,0 +1,163 @@
# Finding (and Replacing) Nemo, Version 1.1, Aristide Grange 2006/06/06
# https://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/496783
"""
Finding (and Replacing) Nemo
Instant Regular Expressions
Created by Aristide Grange
"""
import itertools
import re
from tkinter import SEL_FIRST, SEL_LAST, Frame, Label, PhotoImage, Scrollbar, Text, Tk
windowTitle = "Finding (and Replacing) Nemo"
initialFind = r"n(.*?)e(.*?)m(.*?)o"
initialRepl = r"M\1A\2K\3I"
initialText = """\
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
"""
images = {
"FIND": "R0lGODlhMAAiAPcAMf/////37//35//n1v97Off///f/9/f37/fexvfOvfeEQvd7QvdrQvdrKfdaKfdSMfdSIe/v9+/v7+/v5+/n3u/e1u/Wxu/Gre+1lO+tnO+thO+Ua+97Y+97Oe97Me9rOe9rMe9jOe9jMe9jIe9aMefe5+fe3ufezuece+eEWudzQudaIedSIedKMedKIedCKedCId7e1t7Wzt7Oxt7Gvd69vd69rd61pd6ljN6UjN6Ue96EY95zY95rUt5rQt5jMd5SId5KIdbn59be3tbGztbGvda1rdaEa9Z7a9Z7WtZzQtZzOdZzMdZjMdZaQtZSOdZSMdZKMdZCKdZCGNY5Ic7W1s7Oxs7Gtc69xs69tc69rc6tpc6llM6clM6cjM6Ue86EY85zWs5rSs5SKc5KKc5KGMa1tcatrcalvcalnMaUpcZ7c8ZzMcZrUsZrOcZrMcZaQsZSOcZSMcZKMcZCKcZCGMYxIcYxGL3Gxr21tb21rb2lpb2crb2cjL2UnL2UlL2UhL2Ec717Wr17Ur1zWr1rMb1jUr1KMb1KIb1CIb0xGLWlrbWlpbWcnLWEe7V7c7VzY7VzUrVSKbVKMbVCMbVCIbU5KbUxIbUxEK2lta2lpa2clK2UjK2MnK2MlK2Ea617e61za61rY61rMa1jSq1aUq1aSq1SQq1KKa0xEKWlnKWcnKWUnKWUhKWMjKWEa6Vza6VrWqVjMaVaUqVaKaVSMaVCMaU5KaUxIaUxGJyclJyMe5yElJyEhJx7e5x7c5xrOZxaQpxSOZxKQpw5IZSMhJSEjJR7c5Rre5RrY5RrUpRSQpRSKZRCOZRCKZQxKZQxIYyEhIx7hIxza4xzY4xrc4xjUoxaa4xaUoxSSoxKQoxCMYw5GIR7c4Rzc4Rre4RjY4RjWoRaa4RSWoRSUoRSMYRKQoRCOYQ5KYQxIXtra3taY3taSntKOXtCMXtCKXNCMXM5MXMxIWtSUmtKSmtKQmtCOWs5MWs5KWs5IWNCKWMxIVIxKUIQCDkhGAAAACH+AS4ALAAAAAAwACIAAAj/AAEIHEiwoMGDCBMqXMiwoUOHMqxIeEiRoZVp7cpZ29WrF4WKIAd208dGAQEVbiTVChUjZMU9+pYQmPmBZpxgvVw+nDdKwQICNVcIXQEkTgKdDdUJ+/nggVAXK1xI3TEA6UIr2uJ8iBqka1cXXTlkqGoVYRZ7iLyqBSs0iiEtZQVKiDGxBI1u3NR6lUpGDKg8MSgEQCphU7Z22vhg0dILXRCpYLuSCcYJT4wqXASBQaBzU7klHxC127OHD7ZDJFpERqRt0x5OnwQpmZmCLEhrbgg4WIHO1RY+nbQ9WRGEDJlmnXwJ+9FBgXMCIzYMVijBBgYMFxIMqJBMSc0Ht7qh/+Gjpte2rnYsYeNlasWIBgQ6yCewIoPCCp/cyP/wgUGbXVu0QcADZNBDnh98gHMLGXYQUw02w61QU3wdbNWDbQVVIIhMMwFF1DaZiPLBAy7E04kafrjSizaK3LFNNc0AAYRQDsAHHQlJ2IDQJ2zE1+EKDjiAijShkECCC8Qgw4cr7ZgyzC2WaHPNLWWoNeNWPiRAw0QFWQFMhz8C+QQ20yAiVSrY+MGOJCsccsst2GCzoHFxxEGGC+8hgs0MB2kyCpgzrUDCbs1Es41UdtATHFFkWELMOtsoQsYcgvRRQw5RSDgGOjZMR1AvPQIq6KCo9AKOJWDd48owQlHR4DXEKP9iyRrK+DNNBTu4RwIPFeTAGUG7hAomkA84gEg1m6ADljy9PBKGGJY4ig0xlsTBRSn98FOFDUC8pwQOPkgHbCGAzhTkA850s0c7j6Hjix9+gBIrMXLeAccWXUCyiRBcBEECdEJ98KtAqtBCYQc/OvDENnl4gYpUxISCIjjzylkGGV9okYUVNogRhAOBuuAEhjG08wOgDYzAgA5bCjIoCe5uwUk80RKTTSppPREGGGCIISOQ9AXBg6cC6WIywvCpoMHAocRBwhP4bHLFLujYkV42xNxBRhAyGrc113EgYtRBerDDDHMoDCyQEL5sE083EkgwQyBhxGFHMM206DUixGxmE0wssbQjCQ4JCaFKFwgQTVAVVhQUwAVPIFJKrHfYYRwi6OCDzzuIJIFhXAD0EccPsYRiSyqKSDpFcWSMIcZRoBMkQyA2BGZDIKSYcggih8TRRg4VxM5QABVYYLxgwiev/PLMCxQQADs=",
"find": "R0lGODlhMAAiAPQAMf////f39+/v7+fn597e3tbW1s7OzsbGxr29vbW1ta2traWlpZycnJSUlIyMjISEhHt7e3Nzc2tra2NjY1paWlJSUkpKSkJCQjk5OSkpKRgYGAAAAAAAAAAAAAAAAAAAACH+AS4ALAAAAAAwACIAAAX/ICCOZGmeaKquY2AGLiuvMCAUBuHWc48Kh0iFInEYCb4kSQCxPBiMxkMigRQEgJiSFVBYHNGG0RiZOHjblWAiiY4fkDhEYoBp06dAWfyAQyKAgAwDaHgnB0RwgYASgQ0IhDuGJDAIFhMRVFSLEX8QCJJ4AQM5AgQHTZqqjBAOCQQEkWkCDRMUFQsICQ4Vm5maEwwHOAsPDTpKMAsUDlO4CssTcb+2DAp8YGCyNFoCEsZwFQ3QDRTTVBRS0g1QbgsCd5QAAwgIBwYFAwStzQ8UEdCKVchky0yVBw7YuXkAKt4IAg74vXHVagqFBRgXSCAyYWAVCH0SNhDTitCJfSL5/4RbAPKPhQYYjVCYYAvCP0BxEDaD8CheAAHNwqh8MMGPSwgLeJWhwHSjqkYI+xg4MMCEgQjtRvZ7UAYCpghMF7CxONOWJkYR+rCpY4JlVpVxKDwYWEactKW9mhYRtqCTgwgWEMArERSK1j5q//6T8KXonFsShpiJkAECgQYVjykooCVA0JGHEWNiYCHThTFeb3UkoiCCBgwGEKQ1kuAJlhFwhA71h5SukwUM5qqeCSGBgicEWkfNiWSERtBad4JNIBaQBaQah1ToyGZBAnsIuIJs1qnqiAIVjIE2gnAB1T5x0icgzXT79ipgMOOEH6HBbREBMJCeGEY08IoLAkzB1YYFwjxwSUGSNULQJnNUwRYlCcyEkALIxECAP9cNMMABYpRhy3ZsSLDaR70oUAiABGCkAxowCGCAAfDYIQACXoElGRsdXWDBdg2Y90IWktDYGYAB9PWHP0PMdFZaF07SQgAFNDAMAQg0QA1UC8xoZQl22JGFPgWkOUCOL1pZQyhjxinnnCWEAAA7",
"REPL": "R0lGODlhMAAjAPcAMf/////3//+lOf+UKf+MEPf///f39/f35/fv7/ecQvecOfecKfeUIfeUGPeUEPeUCPeMAO/37+/v9+/v3u/n3u/n1u+9jO+9c++1hO+ta++tY++tWu+tUu+tSu+lUu+lQu+lMe+UMe+UKe+UGO+UEO+UAO+MCOfv5+fvxufn7+fn5+fnzue9lOe9c+e1jOe1e+e1c+e1a+etWuetUuelQuecOeeUUueUCN7e597e3t7e1t7ezt7evd7Wzt7Oxt7Ovd7Otd7Opd7OnN7Gtd7Gpd69lN61hN6ta96lStbextberdbW3tbWztbWxtbOvdbOrda1hNalUtaECM7W1s7Ozs7Oxs7Otc7Gxs7Gvc69tc69rc69pc61jM6lc8bWlMbOvcbGxsbGpca9tca9pca1nMaMAL3OhL3Gtb21vb21tb2tpb2tnL2tlLW9tbW9pbW9e7W1pbWtjLWcKa21nK2tra2tnK2tlK2lpa2llK2ljK2le6WlnKWljKWUe6WUc6WUY5y1QpyclJycjJychJyUc5yMY5StY5SUe5SMhJSMe5SMc5SMWpSEa5SESoyUe4yMhIyEY4SlKYScWoSMe4SEe4SEa4R7c4R7Y3uMY3uEe3t7e3t7c3tza3tzY3trKXtjIXOcAHOUMXOEY3Nzc3NzWnNrSmulCGuUMWuMGGtzWmtrY2taMWtaGGOUOWOMAGNzUmNjWmNjSmNaUmNaQmNaOWNaIWNSCFqcAFpjUlpSMVpSIVpSEFpKKVKMAFJSUlJSSlJSMVJKMVJKGFJKAFI5CEqUAEqEAEpzQkpKIUpCQkpCGEpCAEo5EEoxAEJjOUJCOUJCAEI5IUIxADl7ADlaITlCOTkxMTkxKTkxEDkhADFzADFrGDE5OTExADEpEClrCCkxKSkpKSkpISkpACkhCCkhACkYACFzACFrACEhCCEYGBhjEBhjABghABgYCBgYABgQEBgQABAQABAIAAhjAAhSAAhKAAgIEAgICABaAABCAAAhAAAQAAAIAAAAAAAAACH+AS4ALAAAAAAwACMAAAj/AAEIHEiwoMGDCBMqXMiwocOHAA4cgEixIIIJO3JMmAjADIqKFU/8MHIkg5EgYXx4iaTkI0iHE6wE2TCggYILQayEAgXIy8uGCKz8sDCAQAMRG3iEcXULlJkJPwli3OFjh9UdYYLE6NBhA04UXHoVA2XoTZgfPKBWlOBDphAWOdfMcfMDLloeO3hIMjbWVCQ5Fn6E2UFxgpsgFjYIEBADrZU6luqEEfqjTqpt54z1uuWqTIcgWAk7PECGzIUQDRosDmxlUrVJkwQJkqVuX71v06YZcyUlROAdbnLAJKPFyAYFAhoMwFlnEh0rWkpz8raPHm7dqKKc/KFFkBUrVn1M/ziBcEIeLUEQI8/AYk0i9Be4sqjsrN66c9/OnbobhpR3HkIUoZ0WVnBE0AGLFKKFD0HAFUQe77HQgQI1hRBDEHMcY0899bBzihZuCPILJD8EccEGGzwAQhFaUHHQH82sUkgeNHISDBk8WCCCcsqFUEQWmOyzjz3sUGNNOO5Y48YOEgowAAQhnBScQV00k82V47jzjy9CXZBcjziFoco//4CDiSOyhPMPLkJZkEBqJmRQxA9uZGEQD8Ncmc044/zzDF2IZQBCCDYE8QMZz/iiCSx0neHGI7BIhhhNn+1gxRpokEcQAp7seWU7/PwTyxqG/iCEEVzQmUombnDRxRExzP9nBR2PCKLFD3UJwcMPa/SRqUGNWJmNOVn+M44ukMRB4KGcWDNLVhuUMEIJAlzwA3DJBHMJIXm4sQYhqyxCRQQGLSIsn1qac2UzysQSyzX/hLMGD0F0IMCODYAQBA9W/PKPOcRiw0wzwxTiokF9dLMnuv/Mo+fCZF7jBr0xbDDCACWEYKgb1vzjDp/jZNOMLX0IZxAKq2TZTjtaOjwOsXyG+s8sZJTIQsUdIGHoJPf8w487QI/TDSt5mGwQFZxc406o8HiDJchk/ltLHpSlJwSvz5DpTjvmuGNOM57koelBOaAhiCaaPBLL0wwbm003peRBnBZqJMJL1ECz/HXYYx/NdAIOOVCxQyLorswymU93o0wuwfAiTDNR/xz0MLXU0XdCE+UwSTRZAq2lsSATu+4wkGvt+TjNzPLrQyegAUku2Hij5cd8LhxyM8QIg4w18HgcdC6BTBFSDmfQqsovttveDcG7lFLHI75cE841sARCxeWsnxC4G9HADPK6ywzDCRqBo0EHHWhMgT1IJzziNci1N7PMKnSYfML96/90AiJKey/0KtbLX1QK0rrNnQ541xugQ7SHhkXBghN0SKACWRc4KlAhBwKcIOYymJCAAAA7",
"repl": "R0lGODlhMAAjAPQAMf////f39+/v7+fn597e3tbW1s7OzsbGxr29vbW1ta2traWlpZycnJSUlIyMjISEhHt7e3Nzc2tra2NjY1paWlJSUkpKSkJCQjk5OTExMSkpKSEhIRgYGBAQEAgICAAAACH+AS4ALAAAAAAwACMAAAX/ICCOZGmeaKqubOu+gCDANBkIQ1EMQhAghFptYEAkEgjEwXBo7ISvweGgWCwUysPjwTgEoCafTySYIhYMxgLBjEQgCULvCw0QdAZdoVhUIJUFChISEAxYeQM1N1OMTAp+UwZ5eA4TEhFbDWYFdC4ECVMJjwl5BwsQa0umEhUVlhESDgqlBp0rAn5nVpBMDxeZDRQbHBgWFBSWDgtLBnFjKwRYCI9VqQsPs0YKEcMXFq0UEalFDWx4BAO2IwPjppAKDkrTWKYUGd7fEJJFEZpM00cOzCgh4EE8SaoWxKNixQooBRMyZMBwAYIRBhUgLDGS4MoBJeoANMhAgQsaCRZm/5lqaCUJhA4cNHjDoKEDBlJUHqkBlYBTiQUZNGjYMMxDhY3VWk6R4MEDBoMUak5AqoYBqANIBo4wcGGDUKIeLlzVZmWJggsVIkwAZaQSA3kdZzlKkIiEAAlDvW5oOkEBs488JTw44oeUIwdvVTFTUK7uiAAPgubt8GFDhQepqETAQCFU1UMGzlqAgFhUsAcCS0AO6lUDhw8xNRSbENGDhgWSHjWUe6ACbKITizmopZoBa6KvOwj9uuHDhwxyj3xekgDDhw5EvWKo0IB4iQLCOCC/njc7ZQ8UeGvza+ABZZgcxJNc4FO1gc0cOsCUrHevc8tdIMTIAhc4F198G2Qwwd8CBIQUAwEINABBBJUwR9R5wElgVRLwWODBBx4cGB8GEzDQIAo33CGJA8gh+JoH/clUgQU0YvDhdfmJdwEFC6Sjgg8yEPAABsPkh2F22cl2AQbn6QdTghTQ5eAJAQyQAAQV0MSBB9gRVZ4GE1mw5JZOAmiAVi1UWcAZDrDyZXYTeaOhA/bIVuIBPtKQ4h7ViYekUPdcEAEbzTzCRp5CADmAAwj+ORGPBcgwAAHo9ABGCYtm0ChwFHShlRiXhmHlkAcCiOeUodqQw5W0oXLAiamy4MOkjOyAaqxUymApDCEAADs=",
}
colors = ["#FF7B39", "#80F121"]
emphColors = ["#DAFC33", "#F42548"]
fieldParams = {
"height": 3,
"width": 70,
"font": ("monaco", 14),
"highlightthickness": 0,
"borderwidth": 0,
"background": "white",
}
textParams = {
"bg": "#F7E0D4",
"fg": "#2321F1",
"highlightthickness": 0,
"width": 1,
"height": 10,
"font": ("verdana", 16),
"wrap": "word",
}
class Zone:
def __init__(self, image, initialField, initialText):
frm = Frame(root)
frm.config(background="white")
self.image = PhotoImage(format="gif", data=images[image.upper()])
self.imageDimmed = PhotoImage(format="gif", data=images[image])
self.img = Label(frm)
self.img.config(borderwidth=0)
self.img.pack(side="left")
self.fld = Text(frm, **fieldParams)
self.initScrollText(frm, self.fld, initialField)
frm = Frame(root)
self.txt = Text(frm, **textParams)
self.initScrollText(frm, self.txt, initialText)
for i in range(2):
self.txt.tag_config(colors[i], background=colors[i])
self.txt.tag_config("emph" + colors[i], foreground=emphColors[i])
def initScrollText(self, frm, txt, contents):
scl = Scrollbar(frm)
scl.config(command=txt.yview)
scl.pack(side="right", fill="y")
txt.pack(side="left", expand=True, fill="x")
txt.config(yscrollcommand=scl.set)
txt.insert("1.0", contents)
frm.pack(fill="x")
Frame(height=2, bd=1, relief="ridge").pack(fill="x")
def refresh(self):
self.colorCycle = itertools.cycle(colors)
try:
self.substitute()
self.img.config(image=self.image)
except re.error:
self.img.config(image=self.imageDimmed)
class FindZone(Zone):
def addTags(self, m):
color = next(self.colorCycle)
self.txt.tag_add(color, "1.0+%sc" % m.start(), "1.0+%sc" % m.end())
try:
self.txt.tag_add(
"emph" + color, "1.0+%sc" % m.start("emph"), "1.0+%sc" % m.end("emph")
)
except:
pass
def substitute(self, *args):
for color in colors:
self.txt.tag_remove(color, "1.0", "end")
self.txt.tag_remove("emph" + color, "1.0", "end")
self.rex = re.compile("") # default value in case of malformed regexp
self.rex = re.compile(self.fld.get("1.0", "end")[:-1], re.MULTILINE)
try:
re.compile("(?P<emph>%s)" % self.fld.get(SEL_FIRST, SEL_LAST))
self.rexSel = re.compile(
"%s(?P<emph>%s)%s"
% (
self.fld.get("1.0", SEL_FIRST),
self.fld.get(SEL_FIRST, SEL_LAST),
self.fld.get(SEL_LAST, "end")[:-1],
),
re.MULTILINE,
)
except:
self.rexSel = self.rex
self.rexSel.sub(self.addTags, self.txt.get("1.0", "end"))
class ReplaceZone(Zone):
def addTags(self, m):
s = sz.rex.sub(self.repl, m.group())
self.txt.delete(
"1.0+%sc" % (m.start() + self.diff), "1.0+%sc" % (m.end() + self.diff)
)
self.txt.insert("1.0+%sc" % (m.start() + self.diff), s, next(self.colorCycle))
self.diff += len(s) - (m.end() - m.start())
def substitute(self):
self.txt.delete("1.0", "end")
self.txt.insert("1.0", sz.txt.get("1.0", "end")[:-1])
self.diff = 0
self.repl = rex0.sub(r"\\g<\1>", self.fld.get("1.0", "end")[:-1])
sz.rex.sub(self.addTags, sz.txt.get("1.0", "end")[:-1])
def launchRefresh(_):
sz.fld.after_idle(sz.refresh)
rz.fld.after_idle(rz.refresh)
def app():
global root, sz, rz, rex0
root = Tk()
root.resizable(height=False, width=True)
root.title(windowTitle)
root.minsize(width=250, height=0)
sz = FindZone("find", initialFind, initialText)
sz.fld.bind("<Button-1>", launchRefresh)
sz.fld.bind("<ButtonRelease-1>", launchRefresh)
sz.fld.bind("<B1-Motion>", launchRefresh)
sz.rexSel = re.compile("")
rz = ReplaceZone("repl", initialRepl, "")
rex0 = re.compile(r"(?<!\\)\\([0-9]+)")
root.bind_all("<Key>", launchRefresh)
launchRefresh(None)
root.mainloop()
if __name__ == "__main__":
app()
__all__ = ["app"]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,937 @@
# Natural Language Toolkit: Shift-Reduce Parser Application
#
# Copyright (C) 2001-2025 NLTK Project
# Author: Edward Loper <edloper@gmail.com>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
"""
A graphical tool for exploring the shift-reduce parser.
The shift-reduce parser maintains a stack, which records the structure
of the portion of the text that has been parsed. The stack is
initially empty. Its contents are shown on the left side of the main
canvas.
On the right side of the main canvas is the remaining text. This is
the portion of the text which has not yet been considered by the
parser.
The parser builds up a tree structure for the text using two
operations:
- "shift" moves the first token from the remaining text to the top
of the stack. In the demo, the top of the stack is its right-hand
side.
- "reduce" uses a grammar production to combine the rightmost stack
elements into a single tree token.
You can control the parser's operation by using the "shift" and
"reduce" buttons; or you can use the "step" button to let the parser
automatically decide which operation to apply. The parser uses the
following rules to decide which operation to apply:
- Only shift if no reductions are available.
- If multiple reductions are available, then apply the reduction
whose CFG production is listed earliest in the grammar.
The "reduce" button applies the reduction whose CFG production is
listed earliest in the grammar. There are two ways to manually choose
which reduction to apply:
- Click on a CFG production from the list of available reductions,
on the left side of the main window. The reduction based on that
production will be applied to the top of the stack.
- Click on one of the stack elements. A popup window will appear,
containing all available reductions. Select one, and it will be
applied to the top of the stack.
Note that reductions can only be applied to the top of the stack.
Keyboard Shortcuts::
[Space]\t Perform the next shift or reduce operation
[s]\t Perform a shift operation
[r]\t Perform a reduction operation
[Ctrl-z]\t Undo most recent operation
[Delete]\t Reset the parser
[g]\t Show/hide available production list
[Ctrl-a]\t Toggle animations
[h]\t Help
[Ctrl-p]\t Print
[q]\t Quit
"""
from tkinter import Button, Frame, IntVar, Label, Listbox, Menu, Scrollbar, Tk
from tkinter.font import Font
from nltk.draw import CFGEditor, TreeSegmentWidget, tree_to_treesegment
from nltk.draw.util import CanvasFrame, EntryDialog, ShowText, TextWidget
from nltk.parse import SteppingShiftReduceParser
from nltk.tree import Tree
from nltk.util import in_idle
"""
Possible future improvements:
- button/window to change and/or select text. Just pop up a window
with an entry, and let them modify the text; and then retokenize
it? Maybe give a warning if it contains tokens whose types are
not in the grammar.
- button/window to change and/or select grammar. Select from
several alternative grammars? Or actually change the grammar? If
the later, then I'd want to define nltk.draw.cfg, which would be
responsible for that.
"""
class ShiftReduceApp:
"""
A graphical tool for exploring the shift-reduce parser. The tool
displays the parser's stack and the remaining text, and allows the
user to control the parser's operation. In particular, the user
can shift tokens onto the stack, and can perform reductions on the
top elements of the stack. A "step" button simply steps through
the parsing process, performing the operations that
``nltk.parse.ShiftReduceParser`` would use.
"""
def __init__(self, grammar, sent, trace=0):
self._sent = sent
self._parser = SteppingShiftReduceParser(grammar, trace)
# Set up the main window.
self._top = Tk()
self._top.title("Shift Reduce Parser Application")
# Animations. animating_lock is a lock to prevent the demo
# from performing new operations while it's animating.
self._animating_lock = 0
self._animate = IntVar(self._top)
self._animate.set(10) # = medium
# The user can hide the grammar.
self._show_grammar = IntVar(self._top)
self._show_grammar.set(1)
# Initialize fonts.
self._init_fonts(self._top)
# Set up key bindings.
self._init_bindings()
# Create the basic frames.
self._init_menubar(self._top)
self._init_buttons(self._top)
self._init_feedback(self._top)
self._init_grammar(self._top)
self._init_canvas(self._top)
# A popup menu for reducing.
self._reduce_menu = Menu(self._canvas, tearoff=0)
# Reset the demo, and set the feedback frame to empty.
self.reset()
self._lastoper1["text"] = ""
#########################################
## Initialization Helpers
#########################################
def _init_fonts(self, root):
# See: <http://www.astro.washington.edu/owen/ROTKFolklore.html>
self._sysfont = Font(font=Button()["font"])
root.option_add("*Font", self._sysfont)
# TWhat's our font size (default=same as sysfont)
self._size = IntVar(root)
self._size.set(self._sysfont.cget("size"))
self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get())
self._font = Font(family="helvetica", size=self._size.get())
def _init_grammar(self, parent):
# Grammar view.
self._prodframe = listframe = Frame(parent)
self._prodframe.pack(fill="both", side="left", padx=2)
self._prodlist_label = Label(
self._prodframe, font=self._boldfont, text="Available Reductions"
)
self._prodlist_label.pack()
self._prodlist = Listbox(
self._prodframe,
selectmode="single",
relief="groove",
background="white",
foreground="#909090",
font=self._font,
selectforeground="#004040",
selectbackground="#c0f0c0",
)
self._prodlist.pack(side="right", fill="both", expand=1)
self._productions = list(self._parser.grammar().productions())
for production in self._productions:
self._prodlist.insert("end", (" %s" % production))
self._prodlist.config(height=min(len(self._productions), 25))
# Add a scrollbar if there are more than 25 productions.
if 1: # len(self._productions) > 25:
listscroll = Scrollbar(self._prodframe, orient="vertical")
self._prodlist.config(yscrollcommand=listscroll.set)
listscroll.config(command=self._prodlist.yview)
listscroll.pack(side="left", fill="y")
# If they select a production, apply it.
self._prodlist.bind("<<ListboxSelect>>", self._prodlist_select)
# When they hover over a production, highlight it.
self._hover = -1
self._prodlist.bind("<Motion>", self._highlight_hover)
self._prodlist.bind("<Leave>", self._clear_hover)
def _init_bindings(self):
# Quit
self._top.bind("<Control-q>", self.destroy)
self._top.bind("<Control-x>", self.destroy)
self._top.bind("<Alt-q>", self.destroy)
self._top.bind("<Alt-x>", self.destroy)
# Ops (step, shift, reduce, undo)
self._top.bind("<space>", self.step)
self._top.bind("<s>", self.shift)
self._top.bind("<Alt-s>", self.shift)
self._top.bind("<Control-s>", self.shift)
self._top.bind("<r>", self.reduce)
self._top.bind("<Alt-r>", self.reduce)
self._top.bind("<Control-r>", self.reduce)
self._top.bind("<Delete>", self.reset)
self._top.bind("<u>", self.undo)
self._top.bind("<Alt-u>", self.undo)
self._top.bind("<Control-u>", self.undo)
self._top.bind("<Control-z>", self.undo)
self._top.bind("<BackSpace>", self.undo)
# Misc
self._top.bind("<Control-p>", self.postscript)
self._top.bind("<Control-h>", self.help)
self._top.bind("<F1>", self.help)
self._top.bind("<Control-g>", self.edit_grammar)
self._top.bind("<Control-t>", self.edit_sentence)
# Animation speed control
self._top.bind("-", lambda e, a=self._animate: a.set(20))
self._top.bind("=", lambda e, a=self._animate: a.set(10))
self._top.bind("+", lambda e, a=self._animate: a.set(4))
def _init_buttons(self, parent):
# Set up the frames.
self._buttonframe = buttonframe = Frame(parent)
buttonframe.pack(fill="none", side="bottom")
Button(
buttonframe,
text="Step",
background="#90c0d0",
foreground="black",
command=self.step,
).pack(side="left")
Button(
buttonframe,
text="Shift",
underline=0,
background="#90f090",
foreground="black",
command=self.shift,
).pack(side="left")
Button(
buttonframe,
text="Reduce",
underline=0,
background="#90f090",
foreground="black",
command=self.reduce,
).pack(side="left")
Button(
buttonframe,
text="Undo",
underline=0,
background="#f0a0a0",
foreground="black",
command=self.undo,
).pack(side="left")
def _init_menubar(self, parent):
menubar = Menu(parent)
filemenu = Menu(menubar, tearoff=0)
filemenu.add_command(
label="Reset Parser", underline=0, command=self.reset, accelerator="Del"
)
filemenu.add_command(
label="Print to Postscript",
underline=0,
command=self.postscript,
accelerator="Ctrl-p",
)
filemenu.add_command(
label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
)
menubar.add_cascade(label="File", underline=0, menu=filemenu)
editmenu = Menu(menubar, tearoff=0)
editmenu.add_command(
label="Edit Grammar",
underline=5,
command=self.edit_grammar,
accelerator="Ctrl-g",
)
editmenu.add_command(
label="Edit Text",
underline=5,
command=self.edit_sentence,
accelerator="Ctrl-t",
)
menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
rulemenu = Menu(menubar, tearoff=0)
rulemenu.add_command(
label="Step", underline=1, command=self.step, accelerator="Space"
)
rulemenu.add_separator()
rulemenu.add_command(
label="Shift", underline=0, command=self.shift, accelerator="Ctrl-s"
)
rulemenu.add_command(
label="Reduce", underline=0, command=self.reduce, accelerator="Ctrl-r"
)
rulemenu.add_separator()
rulemenu.add_command(
label="Undo", underline=0, command=self.undo, accelerator="Ctrl-u"
)
menubar.add_cascade(label="Apply", underline=0, menu=rulemenu)
viewmenu = Menu(menubar, tearoff=0)
viewmenu.add_checkbutton(
label="Show Grammar",
underline=0,
variable=self._show_grammar,
command=self._toggle_grammar,
)
viewmenu.add_separator()
viewmenu.add_radiobutton(
label="Tiny",
variable=self._size,
underline=0,
value=10,
command=self.resize,
)
viewmenu.add_radiobutton(
label="Small",
variable=self._size,
underline=0,
value=12,
command=self.resize,
)
viewmenu.add_radiobutton(
label="Medium",
variable=self._size,
underline=0,
value=14,
command=self.resize,
)
viewmenu.add_radiobutton(
label="Large",
variable=self._size,
underline=0,
value=18,
command=self.resize,
)
viewmenu.add_radiobutton(
label="Huge",
variable=self._size,
underline=0,
value=24,
command=self.resize,
)
menubar.add_cascade(label="View", underline=0, menu=viewmenu)
animatemenu = Menu(menubar, tearoff=0)
animatemenu.add_radiobutton(
label="No Animation", underline=0, variable=self._animate, value=0
)
animatemenu.add_radiobutton(
label="Slow Animation",
underline=0,
variable=self._animate,
value=20,
accelerator="-",
)
animatemenu.add_radiobutton(
label="Normal Animation",
underline=0,
variable=self._animate,
value=10,
accelerator="=",
)
animatemenu.add_radiobutton(
label="Fast Animation",
underline=0,
variable=self._animate,
value=4,
accelerator="+",
)
menubar.add_cascade(label="Animate", underline=1, menu=animatemenu)
helpmenu = Menu(menubar, tearoff=0)
helpmenu.add_command(label="About", underline=0, command=self.about)
helpmenu.add_command(
label="Instructions", underline=0, command=self.help, accelerator="F1"
)
menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
parent.config(menu=menubar)
def _init_feedback(self, parent):
self._feedbackframe = feedbackframe = Frame(parent)
feedbackframe.pack(fill="x", side="bottom", padx=3, pady=3)
self._lastoper_label = Label(
feedbackframe, text="Last Operation:", font=self._font
)
self._lastoper_label.pack(side="left")
lastoperframe = Frame(feedbackframe, relief="sunken", border=1)
lastoperframe.pack(fill="x", side="right", expand=1, padx=5)
self._lastoper1 = Label(
lastoperframe, foreground="#007070", background="#f0f0f0", font=self._font
)
self._lastoper2 = Label(
lastoperframe,
anchor="w",
width=30,
foreground="#004040",
background="#f0f0f0",
font=self._font,
)
self._lastoper1.pack(side="left")
self._lastoper2.pack(side="left", fill="x", expand=1)
def _init_canvas(self, parent):
self._cframe = CanvasFrame(
parent,
background="white",
width=525,
closeenough=10,
border=2,
relief="sunken",
)
self._cframe.pack(expand=1, fill="both", side="top", pady=2)
canvas = self._canvas = self._cframe.canvas()
self._stackwidgets = []
self._rtextwidgets = []
self._titlebar = canvas.create_rectangle(
0, 0, 0, 0, fill="#c0f0f0", outline="black"
)
self._exprline = canvas.create_line(0, 0, 0, 0, dash=".")
self._stacktop = canvas.create_line(0, 0, 0, 0, fill="#408080")
size = self._size.get() + 4
self._stacklabel = TextWidget(
canvas, "Stack", color="#004040", font=self._boldfont
)
self._rtextlabel = TextWidget(
canvas, "Remaining Text", color="#004040", font=self._boldfont
)
self._cframe.add_widget(self._stacklabel)
self._cframe.add_widget(self._rtextlabel)
#########################################
## Main draw procedure
#########################################
def _redraw(self):
scrollregion = self._canvas["scrollregion"].split()
(cx1, cy1, cx2, cy2) = (int(c) for c in scrollregion)
# Delete the old stack & rtext widgets.
for stackwidget in self._stackwidgets:
self._cframe.destroy_widget(stackwidget)
self._stackwidgets = []
for rtextwidget in self._rtextwidgets:
self._cframe.destroy_widget(rtextwidget)
self._rtextwidgets = []
# Position the titlebar & exprline
(x1, y1, x2, y2) = self._stacklabel.bbox()
y = y2 - y1 + 10
self._canvas.coords(self._titlebar, -5000, 0, 5000, y - 4)
self._canvas.coords(self._exprline, 0, y * 2 - 10, 5000, y * 2 - 10)
# Position the titlebar labels..
(x1, y1, x2, y2) = self._stacklabel.bbox()
self._stacklabel.move(5 - x1, 3 - y1)
(x1, y1, x2, y2) = self._rtextlabel.bbox()
self._rtextlabel.move(cx2 - x2 - 5, 3 - y1)
# Draw the stack.
stackx = 5
for tok in self._parser.stack():
if isinstance(tok, Tree):
attribs = {
"tree_color": "#4080a0",
"tree_width": 2,
"node_font": self._boldfont,
"node_color": "#006060",
"leaf_color": "#006060",
"leaf_font": self._font,
}
widget = tree_to_treesegment(self._canvas, tok, **attribs)
widget.label()["color"] = "#000000"
else:
widget = TextWidget(self._canvas, tok, color="#000000", font=self._font)
widget.bind_click(self._popup_reduce)
self._stackwidgets.append(widget)
self._cframe.add_widget(widget, stackx, y)
stackx = widget.bbox()[2] + 10
# Draw the remaining text.
rtextwidth = 0
for tok in self._parser.remaining_text():
widget = TextWidget(self._canvas, tok, color="#000000", font=self._font)
self._rtextwidgets.append(widget)
self._cframe.add_widget(widget, rtextwidth, y)
rtextwidth = widget.bbox()[2] + 4
# Allow enough room to shift the next token (for animations)
if len(self._rtextwidgets) > 0:
stackx += self._rtextwidgets[0].width()
# Move the remaining text to the correct location (keep it
# right-justified, when possible); and move the remaining text
# label, if necessary.
stackx = max(stackx, self._stacklabel.width() + 25)
rlabelwidth = self._rtextlabel.width() + 10
if stackx >= cx2 - max(rtextwidth, rlabelwidth):
cx2 = stackx + max(rtextwidth, rlabelwidth)
for rtextwidget in self._rtextwidgets:
rtextwidget.move(4 + cx2 - rtextwidth, 0)
self._rtextlabel.move(cx2 - self._rtextlabel.bbox()[2] - 5, 0)
midx = (stackx + cx2 - max(rtextwidth, rlabelwidth)) / 2
self._canvas.coords(self._stacktop, midx, 0, midx, 5000)
(x1, y1, x2, y2) = self._stacklabel.bbox()
# Set up binding to allow them to shift a token by dragging it.
if len(self._rtextwidgets) > 0:
def drag_shift(widget, midx=midx, self=self):
if widget.bbox()[0] < midx:
self.shift()
else:
self._redraw()
self._rtextwidgets[0].bind_drag(drag_shift)
self._rtextwidgets[0].bind_click(self.shift)
# Draw the stack top.
self._highlight_productions()
def _draw_stack_top(self, widget):
# hack..
midx = widget.bbox()[2] + 50
self._canvas.coords(self._stacktop, midx, 0, midx, 5000)
def _highlight_productions(self):
# Highlight the productions that can be reduced.
self._prodlist.selection_clear(0, "end")
for prod in self._parser.reducible_productions():
index = self._productions.index(prod)
self._prodlist.selection_set(index)
#########################################
## Button Callbacks
#########################################
def destroy(self, *e):
if self._top is None:
return
self._top.destroy()
self._top = None
def reset(self, *e):
self._parser.initialize(self._sent)
self._lastoper1["text"] = "Reset App"
self._lastoper2["text"] = ""
self._redraw()
def step(self, *e):
if self.reduce():
return True
elif self.shift():
return True
else:
if list(self._parser.parses()):
self._lastoper1["text"] = "Finished:"
self._lastoper2["text"] = "Success"
else:
self._lastoper1["text"] = "Finished:"
self._lastoper2["text"] = "Failure"
def shift(self, *e):
if self._animating_lock:
return
if self._parser.shift():
tok = self._parser.stack()[-1]
self._lastoper1["text"] = "Shift:"
self._lastoper2["text"] = "%r" % tok
if self._animate.get():
self._animate_shift()
else:
self._redraw()
return True
return False
def reduce(self, *e):
if self._animating_lock:
return
production = self._parser.reduce()
if production:
self._lastoper1["text"] = "Reduce:"
self._lastoper2["text"] = "%s" % production
if self._animate.get():
self._animate_reduce()
else:
self._redraw()
return production
def undo(self, *e):
if self._animating_lock:
return
if self._parser.undo():
self._redraw()
def postscript(self, *e):
self._cframe.print_to_file()
def mainloop(self, *args, **kwargs):
"""
Enter the Tkinter mainloop. This function must be called if
this demo is created from a non-interactive program (e.g.
from a secript); otherwise, the demo will close as soon as
the script completes.
"""
if in_idle():
return
self._top.mainloop(*args, **kwargs)
#########################################
## Menubar callbacks
#########################################
def resize(self, size=None):
if size is not None:
self._size.set(size)
size = self._size.get()
self._font.configure(size=-(abs(size)))
self._boldfont.configure(size=-(abs(size)))
self._sysfont.configure(size=-(abs(size)))
# self._stacklabel['font'] = ('helvetica', -size-4, 'bold')
# self._rtextlabel['font'] = ('helvetica', -size-4, 'bold')
# self._lastoper_label['font'] = ('helvetica', -size)
# self._lastoper1['font'] = ('helvetica', -size)
# self._lastoper2['font'] = ('helvetica', -size)
# self._prodlist['font'] = ('helvetica', -size)
# self._prodlist_label['font'] = ('helvetica', -size-2, 'bold')
self._redraw()
def help(self, *e):
# The default font's not very legible; try using 'fixed' instead.
try:
ShowText(
self._top,
"Help: Shift-Reduce Parser Application",
(__doc__ or "").strip(),
width=75,
font="fixed",
)
except:
ShowText(
self._top,
"Help: Shift-Reduce Parser Application",
(__doc__ or "").strip(),
width=75,
)
def about(self, *e):
ABOUT = "NLTK Shift-Reduce Parser Application\n" + "Written by Edward Loper"
TITLE = "About: Shift-Reduce Parser Application"
try:
from tkinter.messagebox import Message
Message(message=ABOUT, title=TITLE).show()
except:
ShowText(self._top, TITLE, ABOUT)
def edit_grammar(self, *e):
CFGEditor(self._top, self._parser.grammar(), self.set_grammar)
def set_grammar(self, grammar):
self._parser.set_grammar(grammar)
self._productions = list(grammar.productions())
self._prodlist.delete(0, "end")
for production in self._productions:
self._prodlist.insert("end", (" %s" % production))
def edit_sentence(self, *e):
sentence = " ".join(self._sent)
title = "Edit Text"
instr = "Enter a new sentence to parse."
EntryDialog(self._top, sentence, instr, self.set_sentence, title)
def set_sentence(self, sent):
self._sent = sent.split() # [XX] use tagged?
self.reset()
#########################################
## Reduce Production Selection
#########################################
def _toggle_grammar(self, *e):
if self._show_grammar.get():
self._prodframe.pack(
fill="both", side="left", padx=2, after=self._feedbackframe
)
self._lastoper1["text"] = "Show Grammar"
else:
self._prodframe.pack_forget()
self._lastoper1["text"] = "Hide Grammar"
self._lastoper2["text"] = ""
def _prodlist_select(self, event):
selection = self._prodlist.curselection()
if len(selection) != 1:
return
index = int(selection[0])
production = self._parser.reduce(self._productions[index])
if production:
self._lastoper1["text"] = "Reduce:"
self._lastoper2["text"] = "%s" % production
if self._animate.get():
self._animate_reduce()
else:
self._redraw()
else:
# Reset the production selections.
self._prodlist.selection_clear(0, "end")
for prod in self._parser.reducible_productions():
index = self._productions.index(prod)
self._prodlist.selection_set(index)
def _popup_reduce(self, widget):
# Remove old commands.
productions = self._parser.reducible_productions()
if len(productions) == 0:
return
self._reduce_menu.delete(0, "end")
for production in productions:
self._reduce_menu.add_command(label=str(production), command=self.reduce)
self._reduce_menu.post(
self._canvas.winfo_pointerx(), self._canvas.winfo_pointery()
)
#########################################
## Animations
#########################################
def _animate_shift(self):
# What widget are we shifting?
widget = self._rtextwidgets[0]
# Where are we shifting from & to?
right = widget.bbox()[0]
if len(self._stackwidgets) == 0:
left = 5
else:
left = self._stackwidgets[-1].bbox()[2] + 10
# Start animating.
dt = self._animate.get()
dx = (left - right) * 1.0 / dt
self._animate_shift_frame(dt, widget, dx)
def _animate_shift_frame(self, frame, widget, dx):
if frame > 0:
self._animating_lock = 1
widget.move(dx, 0)
self._top.after(10, self._animate_shift_frame, frame - 1, widget, dx)
else:
# but: stacktop??
# Shift the widget to the stack.
del self._rtextwidgets[0]
self._stackwidgets.append(widget)
self._animating_lock = 0
# Display the available productions.
self._draw_stack_top(widget)
self._highlight_productions()
def _animate_reduce(self):
# What widgets are we shifting?
numwidgets = len(self._parser.stack()[-1]) # number of children
widgets = self._stackwidgets[-numwidgets:]
# How far are we moving?
if isinstance(widgets[0], TreeSegmentWidget):
ydist = 15 + widgets[0].label().height()
else:
ydist = 15 + widgets[0].height()
# Start animating.
dt = self._animate.get()
dy = ydist * 2.0 / dt
self._animate_reduce_frame(dt / 2, widgets, dy)
def _animate_reduce_frame(self, frame, widgets, dy):
if frame > 0:
self._animating_lock = 1
for widget in widgets:
widget.move(0, dy)
self._top.after(10, self._animate_reduce_frame, frame - 1, widgets, dy)
else:
del self._stackwidgets[-len(widgets) :]
for widget in widgets:
self._cframe.remove_widget(widget)
tok = self._parser.stack()[-1]
if not isinstance(tok, Tree):
raise ValueError()
label = TextWidget(
self._canvas, str(tok.label()), color="#006060", font=self._boldfont
)
widget = TreeSegmentWidget(self._canvas, label, widgets, width=2)
(x1, y1, x2, y2) = self._stacklabel.bbox()
y = y2 - y1 + 10
if not self._stackwidgets:
x = 5
else:
x = self._stackwidgets[-1].bbox()[2] + 10
self._cframe.add_widget(widget, x, y)
self._stackwidgets.append(widget)
# Display the available productions.
self._draw_stack_top(widget)
self._highlight_productions()
# # Delete the old widgets..
# del self._stackwidgets[-len(widgets):]
# for widget in widgets:
# self._cframe.destroy_widget(widget)
#
# # Make a new one.
# tok = self._parser.stack()[-1]
# if isinstance(tok, Tree):
# attribs = {'tree_color': '#4080a0', 'tree_width': 2,
# 'node_font': bold, 'node_color': '#006060',
# 'leaf_color': '#006060', 'leaf_font':self._font}
# widget = tree_to_treesegment(self._canvas, tok.type(),
# **attribs)
# widget.node()['color'] = '#000000'
# else:
# widget = TextWidget(self._canvas, tok.type(),
# color='#000000', font=self._font)
# widget.bind_click(self._popup_reduce)
# (x1, y1, x2, y2) = self._stacklabel.bbox()
# y = y2-y1+10
# if not self._stackwidgets: x = 5
# else: x = self._stackwidgets[-1].bbox()[2] + 10
# self._cframe.add_widget(widget, x, y)
# self._stackwidgets.append(widget)
# self._redraw()
self._animating_lock = 0
#########################################
## Hovering.
#########################################
def _highlight_hover(self, event):
# What production are we hovering over?
index = self._prodlist.nearest(event.y)
if self._hover == index:
return
# Clear any previous hover highlighting.
self._clear_hover()
# If the production corresponds to an available reduction,
# highlight the stack.
selection = [int(s) for s in self._prodlist.curselection()]
if index in selection:
rhslen = len(self._productions[index].rhs())
for stackwidget in self._stackwidgets[-rhslen:]:
if isinstance(stackwidget, TreeSegmentWidget):
stackwidget.label()["color"] = "#00a000"
else:
stackwidget["color"] = "#00a000"
# Remember what production we're hovering over.
self._hover = index
def _clear_hover(self, *event):
# Clear any previous hover highlighting.
if self._hover == -1:
return
self._hover = -1
for stackwidget in self._stackwidgets:
if isinstance(stackwidget, TreeSegmentWidget):
stackwidget.label()["color"] = "black"
else:
stackwidget["color"] = "black"
def app():
"""
Create a shift reduce parser app, using a simple grammar and
text.
"""
from nltk.grammar import CFG, Nonterminal, Production
nonterminals = "S VP NP PP P N Name V Det"
(S, VP, NP, PP, P, N, Name, V, Det) = (Nonterminal(s) for s in nonterminals.split())
productions = (
# Syntactic Productions
Production(S, [NP, VP]),
Production(NP, [Det, N]),
Production(NP, [NP, PP]),
Production(VP, [VP, PP]),
Production(VP, [V, NP, PP]),
Production(VP, [V, NP]),
Production(PP, [P, NP]),
# Lexical Productions
Production(NP, ["I"]),
Production(Det, ["the"]),
Production(Det, ["a"]),
Production(N, ["man"]),
Production(V, ["saw"]),
Production(P, ["in"]),
Production(P, ["with"]),
Production(N, ["park"]),
Production(N, ["dog"]),
Production(N, ["statue"]),
Production(Det, ["my"]),
)
grammar = CFG(S, productions)
# tokenize the sentence
sent = "my dog saw a man in the park with a statue".split()
ShiftReduceApp(grammar, sent).mainloop()
if __name__ == "__main__":
app()
__all__ = ["app"]

View File

@@ -0,0 +1,36 @@
# Natural Language Toolkit: Wordfreq Application
#
# Copyright (C) 2001-2025 NLTK Project
# Author: Sumukh Ghodke <sghodke@csse.unimelb.edu.au>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
from matplotlib import pylab
from nltk.corpus import gutenberg
from nltk.text import Text
def plot_word_freq_dist(text):
fd = text.vocab()
samples = [item for item, _ in fd.most_common(50)]
values = [fd[sample] for sample in samples]
values = [sum(values[: i + 1]) * 100.0 / fd.N() for i in range(len(values))]
pylab.title(text.name)
pylab.xlabel("Samples")
pylab.ylabel("Cumulative Percentage")
pylab.plot(values)
pylab.xticks(range(len(samples)), [str(s) for s in samples], rotation=90)
pylab.show()
def app():
t1 = Text(gutenberg.words("melville-moby_dick.txt"))
plot_word_freq_dist(t1)
if __name__ == "__main__":
app()
__all__ = ["app"]

File diff suppressed because it is too large Load Diff