updates
This commit is contained in:
108
Backend/venv/lib/python3.12/site-packages/nltk/tabdata.py
Normal file
108
Backend/venv/lib/python3.12/site-packages/nltk/tabdata.py
Normal file
@@ -0,0 +1,108 @@
|
||||
# Natural Language Toolkit: Encode/Decocode Data as Tab-files
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Eric Kafe <kafe.eric@gmail.com>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
#
|
||||
|
||||
|
||||
def rm_nl(s):
|
||||
if s[-1] == "\n":
|
||||
return s[:-1]
|
||||
return s
|
||||
|
||||
|
||||
class TabEncoder:
|
||||
|
||||
def list2txt(self, s):
|
||||
return "\n".join(s)
|
||||
|
||||
def set2txt(self, s):
|
||||
return self.list2txt(list(s))
|
||||
|
||||
def tup2tab(self, tup):
|
||||
return "\t".join(tup)
|
||||
|
||||
def tups2tab(self, x):
|
||||
return "\n".join([self.tup2tab(tup) for tup in x])
|
||||
|
||||
def dict2tab(self, d):
|
||||
return self.tups2tab(d.items())
|
||||
|
||||
def ivdict2tab(self, d):
|
||||
# From integer-value dictionary
|
||||
return self.tups2tab([(a, str(b)) for a, b in d.items()])
|
||||
|
||||
|
||||
class TabDecoder:
|
||||
|
||||
def txt2list(self, f):
|
||||
return [rm_nl(x) for x in f]
|
||||
|
||||
def txt2set(self, f):
|
||||
return {rm_nl(x) for x in f}
|
||||
|
||||
def tab2tup(self, s):
|
||||
return tuple(s.split("\t"))
|
||||
|
||||
def tab2tups(self, f):
|
||||
return [self.tab2tup(rm_nl(x)) for x in f]
|
||||
|
||||
def tab2dict(self, f):
|
||||
return {a: b for a, b in self.tab2tups(f)}
|
||||
|
||||
def tab2ivdict(self, f):
|
||||
# To integer-value dictionary
|
||||
return {a: int(b) for a, b in self.tab2tups(f)}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Maxent data
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class MaxentEncoder(TabEncoder):
|
||||
|
||||
def tupdict2tab(self, d):
|
||||
def rep(a, b):
|
||||
if a == "wordlen":
|
||||
return repr(b)
|
||||
if b in [True, False, None]:
|
||||
return f"repr-{b}"
|
||||
return b
|
||||
|
||||
return self.tups2tab(
|
||||
[(a, rep(a, b), c, repr(d)) for ((a, b, c), d) in d.items()]
|
||||
)
|
||||
|
||||
|
||||
class MaxentDecoder(TabDecoder):
|
||||
|
||||
def tupkey2dict(self, f):
|
||||
|
||||
def rep(a, b):
|
||||
if a == "wordlen":
|
||||
return int(b)
|
||||
if b == "repr-None":
|
||||
return None
|
||||
if b == "repr-True":
|
||||
return True
|
||||
if b == "repr-False":
|
||||
return False
|
||||
return b
|
||||
|
||||
return {(a, rep(a, b), c): int(d) for (a, b, c, d) in self.tab2tups(f)}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Punkt data
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class PunktDecoder(TabDecoder):
|
||||
|
||||
def tab2intdict(self, f):
|
||||
from collections import defaultdict
|
||||
|
||||
return defaultdict(int, {a: int(b) for a, b in self.tab2tups(f)})
|
||||
Reference in New Issue
Block a user