updates
This commit is contained in:
@@ -0,0 +1,67 @@
|
||||
# Natural Language Toolkit: Dispersion Plots
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Steven Bird <stevenbird1@gmail.com>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
A utility for displaying lexical dispersion.
|
||||
"""
|
||||
|
||||
|
||||
def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Plot"):
|
||||
"""
|
||||
Generate a lexical dispersion plot.
|
||||
|
||||
:param text: The source text
|
||||
:type text: list(str) or iter(str)
|
||||
:param words: The target words
|
||||
:type words: list of str
|
||||
:param ignore_case: flag to set if case should be ignored when searching text
|
||||
:type ignore_case: bool
|
||||
:return: a matplotlib Axes object that may still be modified before plotting
|
||||
:rtype: Axes
|
||||
"""
|
||||
|
||||
try:
|
||||
import matplotlib.pyplot as plt
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"The plot function requires matplotlib to be installed. "
|
||||
"See https://matplotlib.org/"
|
||||
) from e
|
||||
|
||||
word2y = {
|
||||
word.casefold() if ignore_case else word: y
|
||||
for y, word in enumerate(reversed(words))
|
||||
}
|
||||
xs, ys = [], []
|
||||
for x, token in enumerate(text):
|
||||
token = token.casefold() if ignore_case else token
|
||||
y = word2y.get(token)
|
||||
if y is not None:
|
||||
xs.append(x)
|
||||
ys.append(y)
|
||||
|
||||
words = words[::-1]
|
||||
|
||||
_, ax = plt.subplots()
|
||||
ax.plot(xs, ys, "|")
|
||||
ax.dataLim.x0, ax.dataLim.x1 = 0, len(text) - 1
|
||||
ax.autoscale(axis="x")
|
||||
ax.set_yticks(list(range(len(words))), words, color="C0")
|
||||
ax.set_ylim(-1, len(words))
|
||||
ax.set_title(title)
|
||||
ax.set_xlabel("Word Offset")
|
||||
return ax
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from nltk.corpus import gutenberg
|
||||
|
||||
words = ["Elinor", "Marianne", "Edward", "Willoughby"]
|
||||
dispersion_plot(gutenberg.words("austen-sense.txt"), words)
|
||||
plt.show()
|
||||
Reference in New Issue
Block a user