updates

2025-12-01 06:50:10 +02:00
parent 91f51bc6fe
commit 62c1fe5951
4682 changed files with 544807 additions and 31208 deletions
--- a/Backend/venv/lib/python3.12/site-packages/nltk/twitter/init.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/twitter/init.py
@@ -0,0 +1,35 @@
+# Natural Language Toolkit: Twitter
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Ewan Klein <ewan@inf.ed.ac.uk>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+NLTK Twitter Package
+
+This package contains classes for retrieving Tweet documents using the
+Twitter API.
+
+"""
+try:
+    import twython
+except ImportError:
+    import warnings
+
+    warnings.warn(
+        "The twython library has not been installed. "
+        "Some functionality from the twitter package will not be available."
+    )
+else:
+    from nltk.twitter.util import Authenticate, credsfromfile
+    from nltk.twitter.twitterclient import (
+        Streamer,
+        Query,
+        Twitter,
+        TweetViewer,
+        TweetWriter,
+    )
+
+
+from nltk.twitter.common import json2csv
--- a/Backend/venv/lib/python3.12/site-packages/nltk/twitter/pycache/init.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/twitter/pycache/init.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/twitter/pycache/api.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/twitter/pycache/api.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/twitter/pycache/common.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/twitter/pycache/common.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/twitter/pycache/twitter_demo.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/twitter/pycache/twitter_demo.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/twitter/pycache/twitterclient.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/twitter/pycache/twitterclient.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/twitter/pycache/util.cpython-312.pyc
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/twitter/pycache/util.cpython-312.pyc
--- a/Backend/venv/lib/python3.12/site-packages/nltk/twitter/api.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/twitter/api.py
@@ -0,0 +1,145 @@
+# Natural Language Toolkit: Twitter API
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Ewan Klein <ewan@inf.ed.ac.uk>
+#         Lorenzo Rubio <lrnzcig@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+This module provides an interface for TweetHandlers, and support for timezone
+handling.
+"""
+
+import time as _time
+from abc import ABCMeta, abstractmethod
+from datetime import datetime, timedelta, timezone, tzinfo
+
+
+class LocalTimezoneOffsetWithUTC(tzinfo):
+    """
+    This is not intended to be a general purpose class for dealing with the
+    local timezone. In particular:
+
+    * it assumes that the date passed has been created using
+      `datetime(..., tzinfo=Local)`, where `Local` is an instance of
+      the object `LocalTimezoneOffsetWithUTC`;
+    * for such an object, it returns the offset with UTC, used for date comparisons.
+
+    Reference: https://docs.python.org/3/library/datetime.html
+    """
+
+    STDOFFSET = timedelta(seconds=-_time.timezone)
+
+    if _time.daylight:
+        DSTOFFSET = timedelta(seconds=-_time.altzone)
+    else:
+        DSTOFFSET = STDOFFSET
+
+    def utcoffset(self, dt):
+        """
+        Access the relevant time offset.
+        """
+        return self.DSTOFFSET
+
+
+LOCAL = LocalTimezoneOffsetWithUTC()
+
+
+class BasicTweetHandler(metaclass=ABCMeta):
+    """
+    Minimal implementation of `TweetHandler`.
+
+    Counts the number of Tweets and decides when the client should stop
+    fetching them.
+    """
+
+    def __init__(self, limit=20):
+        self.limit = limit
+        self.counter = 0
+
+        """
+        A flag to indicate to the client whether to stop fetching data given
+        some condition (e.g., reaching a date limit).
+        """
+        self.do_stop = False
+
+        """
+        Stores the id of the last fetched Tweet to handle pagination.
+        """
+        self.max_id = None
+
+    def do_continue(self):
+        """
+        Returns `False` if the client should stop fetching Tweets.
+        """
+        return self.counter < self.limit and not self.do_stop
+
+
+class TweetHandlerI(BasicTweetHandler):
+    """
+    Interface class whose subclasses should implement a handle method that
+    Twitter clients can delegate to.
+    """
+
+    def __init__(self, limit=20, upper_date_limit=None, lower_date_limit=None):
+        """
+        :param int limit: The number of data items to process in the current\
+        round of processing.
+
+        :param tuple upper_date_limit: The date at which to stop collecting\
+        new data. This should be entered as a tuple which can serve as the\
+        argument to `datetime.datetime`.\
+        E.g. `date_limit=(2015, 4, 1, 12, 40)` for 12:30 pm on April 1 2015.
+
+        :param tuple lower_date_limit: The date at which to stop collecting\
+        new data. See `upper_data_limit` for formatting.
+        """
+        BasicTweetHandler.__init__(self, limit)
+
+        self.upper_date_limit = None
+        self.lower_date_limit = None
+        if upper_date_limit:
+            self.upper_date_limit = datetime(*upper_date_limit, tzinfo=LOCAL)
+        if lower_date_limit:
+            self.lower_date_limit = datetime(*lower_date_limit, tzinfo=LOCAL)
+
+        self.startingup = True
+
+    @abstractmethod
+    def handle(self, data):
+        """
+        Deal appropriately with data returned by the Twitter API
+        """
+
+    @abstractmethod
+    def on_finish(self):
+        """
+        Actions when the tweet limit has been reached
+        """
+
+    def check_date_limit(self, data, verbose=False):
+        """
+        Validate date limits.
+        """
+        if self.upper_date_limit or self.lower_date_limit:
+            date_fmt = "%a %b %d %H:%M:%S +0000 %Y"
+            tweet_date = datetime.strptime(data["created_at"], date_fmt).replace(
+                tzinfo=timezone.utc
+            )
+            if (self.upper_date_limit and tweet_date > self.upper_date_limit) or (
+                self.lower_date_limit and tweet_date < self.lower_date_limit
+            ):
+                if self.upper_date_limit:
+                    message = "earlier"
+                    date_limit = self.upper_date_limit
+                else:
+                    message = "later"
+                    date_limit = self.lower_date_limit
+                if verbose:
+                    print(
+                        "Date limit {} is {} than date of current tweet {}".format(
+                            date_limit, message, tweet_date
+                        )
+                    )
+                self.do_stop = True
--- a/Backend/venv/lib/python3.12/site-packages/nltk/twitter/common.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/twitter/common.py
@@ -0,0 +1,270 @@
+# Natural Language Toolkit: Twitter client
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Ewan Klein <ewan@inf.ed.ac.uk>
+#         Lorenzo Rubio <lrnzcig@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Utility functions for the `twitterclient` module which do not require
+the `twython` library to have been installed.
+"""
+import csv
+import gzip
+import json
+
+from nltk.internals import deprecated
+
+HIER_SEPARATOR = "."
+
+
+def extract_fields(tweet, fields):
+    """
+    Extract field values from a full tweet and return them as a list
+
+    :param json tweet: The tweet in JSON format
+    :param list fields: The fields to be extracted from the tweet
+    :rtype: list(str)
+    """
+    out = []
+    for field in fields:
+        try:
+            _add_field_to_out(tweet, field, out)
+        except TypeError as e:
+            raise RuntimeError(
+                "Fatal error when extracting fields. Cannot find field ", field
+            ) from e
+    return out
+
+
+def _add_field_to_out(json, field, out):
+    if _is_composed_key(field):
+        key, value = _get_key_value_composed(field)
+        _add_field_to_out(json[key], value, out)
+    else:
+        out += [json[field]]
+
+
+def _is_composed_key(field):
+    return HIER_SEPARATOR in field
+
+
+def _get_key_value_composed(field):
+    out = field.split(HIER_SEPARATOR)
+    # there could be up to 3 levels
+    key = out[0]
+    value = HIER_SEPARATOR.join(out[1:])
+    return key, value
+
+
+def _get_entity_recursive(json, entity):
+    if not json:
+        return None
+    elif isinstance(json, dict):
+        for key, value in json.items():
+            if key == entity:
+                return value
+            # 'entities' and 'extended_entities' are wrappers in Twitter json
+            # structure that contain other Twitter objects. See:
+            # https://dev.twitter.com/overview/api/entities-in-twitter-objects
+
+            if key == "entities" or key == "extended_entities":
+                candidate = _get_entity_recursive(value, entity)
+                if candidate is not None:
+                    return candidate
+        return None
+    elif isinstance(json, list):
+        for item in json:
+            candidate = _get_entity_recursive(item, entity)
+            if candidate is not None:
+                return candidate
+        return None
+    else:
+        return None
+
+
+def json2csv(
+    fp, outfile, fields, encoding="utf8", errors="replace", gzip_compress=False
+):
+    """
+    Extract selected fields from a file of line-separated JSON tweets and
+    write to a file in CSV format.
+
+    This utility function allows a file of full tweets to be easily converted
+    to a CSV file for easier processing. For example, just TweetIDs or
+    just the text content of the Tweets can be extracted.
+
+    Additionally, the function allows combinations of fields of other Twitter
+    objects (mainly the users, see below).
+
+    For Twitter entities (e.g. hashtags of a Tweet), and for geolocation, see
+    `json2csv_entities`
+
+    :param str infile: The name of the file containing full tweets
+
+    :param str outfile: The name of the text file where results should be\
+    written
+
+    :param list fields: The list of fields to be extracted. Useful examples\
+    are 'id_str' for the tweetID and 'text' for the text of the tweet. See\
+    <https://dev.twitter.com/overview/api/tweets> for a full list of fields.\
+    e. g.: ['id_str'], ['id', 'text', 'favorite_count', 'retweet_count']\
+    Additionally, it allows IDs from other Twitter objects, e. g.,\
+    ['id', 'text', 'user.id', 'user.followers_count', 'user.friends_count']
+
+    :param error: Behaviour for encoding errors, see\
+    https://docs.python.org/3/library/codecs.html#codec-base-classes
+
+    :param gzip_compress: if `True`, output files are compressed with gzip
+    """
+    (writer, outf) = _outf_writer(outfile, encoding, errors, gzip_compress)
+    # write the list of fields as header
+    writer.writerow(fields)
+    # process the file
+    for line in fp:
+        tweet = json.loads(line)
+        row = extract_fields(tweet, fields)
+        writer.writerow(row)
+    outf.close()
+
+
+@deprecated("Use open() and csv.writer() directly instead.")
+def outf_writer_compat(outfile, encoding, errors, gzip_compress=False):
+    """Get a CSV writer with optional compression."""
+    return _outf_writer(outfile, encoding, errors, gzip_compress)
+
+
+def _outf_writer(outfile, encoding, errors, gzip_compress=False):
+    if gzip_compress:
+        outf = gzip.open(outfile, "wt", newline="", encoding=encoding, errors=errors)
+    else:
+        outf = open(outfile, "w", newline="", encoding=encoding, errors=errors)
+    writer = csv.writer(outf)
+    return (writer, outf)
+
+
+def json2csv_entities(
+    tweets_file,
+    outfile,
+    main_fields,
+    entity_type,
+    entity_fields,
+    encoding="utf8",
+    errors="replace",
+    gzip_compress=False,
+):
+    """
+    Extract selected fields from a file of line-separated JSON tweets and
+    write to a file in CSV format.
+
+    This utility function allows a file of full Tweets to be easily converted
+    to a CSV file for easier processing of Twitter entities. For example, the
+    hashtags or media elements of a tweet can be extracted.
+
+    It returns one line per entity of a Tweet, e.g. if a tweet has two hashtags
+    there will be two lines in the output file, one per hashtag
+
+    :param tweets_file: the file-like object containing full Tweets
+
+    :param str outfile: The path of the text file where results should be\
+        written
+
+    :param list main_fields: The list of fields to be extracted from the main\
+        object, usually the tweet. Useful examples: 'id_str' for the tweetID. See\
+        <https://dev.twitter.com/overview/api/tweets> for a full list of fields.
+        e. g.: ['id_str'], ['id', 'text', 'favorite_count', 'retweet_count']
+        If `entity_type` is expressed with hierarchy, then it is the list of\
+        fields of the object that corresponds to the key of the entity_type,\
+        (e.g., for entity_type='user.urls', the fields in the main_fields list\
+        belong to the user object; for entity_type='place.bounding_box', the\
+        files in the main_field list belong to the place object of the tweet).
+
+    :param list entity_type: The name of the entity: 'hashtags', 'media',\
+        'urls' and 'user_mentions' for the tweet object. For a user object,\
+        this needs to be expressed with a hierarchy: `'user.urls'`. For the\
+        bounding box of the Tweet location, use `'place.bounding_box'`.
+
+    :param list entity_fields: The list of fields to be extracted from the\
+        entity. E.g. `['text']` (of the Tweet)
+
+    :param error: Behaviour for encoding errors, see\
+        https://docs.python.org/3/library/codecs.html#codec-base-classes
+
+    :param gzip_compress: if `True`, output files are compressed with gzip
+    """
+
+    (writer, outf) = _outf_writer(outfile, encoding, errors, gzip_compress)
+    header = get_header_field_list(main_fields, entity_type, entity_fields)
+    writer.writerow(header)
+    for line in tweets_file:
+        tweet = json.loads(line)
+        if _is_composed_key(entity_type):
+            key, value = _get_key_value_composed(entity_type)
+            object_json = _get_entity_recursive(tweet, key)
+            if not object_json:
+                # this can happen in the case of "place"
+                continue
+            object_fields = extract_fields(object_json, main_fields)
+            items = _get_entity_recursive(object_json, value)
+            _write_to_file(object_fields, items, entity_fields, writer)
+        else:
+            tweet_fields = extract_fields(tweet, main_fields)
+            items = _get_entity_recursive(tweet, entity_type)
+            _write_to_file(tweet_fields, items, entity_fields, writer)
+    outf.close()
+
+
+def get_header_field_list(main_fields, entity_type, entity_fields):
+    if _is_composed_key(entity_type):
+        key, value = _get_key_value_composed(entity_type)
+        main_entity = key
+        sub_entity = value
+    else:
+        main_entity = None
+        sub_entity = entity_type
+
+    if main_entity:
+        output1 = [HIER_SEPARATOR.join([main_entity, x]) for x in main_fields]
+    else:
+        output1 = main_fields
+    output2 = [HIER_SEPARATOR.join([sub_entity, x]) for x in entity_fields]
+    return output1 + output2
+
+
+def _write_to_file(object_fields, items, entity_fields, writer):
+    if not items:
+        # it could be that the entity is just not present for the tweet
+        # e.g. tweet hashtag is always present, even as [], however
+        # tweet media may not be present
+        return
+    if isinstance(items, dict):
+        # this happens e.g. for "place" of a tweet
+        row = object_fields
+        # there might be composed keys in de list of required fields
+        entity_field_values = [x for x in entity_fields if not _is_composed_key(x)]
+        entity_field_composed = [x for x in entity_fields if _is_composed_key(x)]
+        for field in entity_field_values:
+            value = items[field]
+            if isinstance(value, list):
+                row += value
+            else:
+                row += [value]
+        # now check required dictionaries
+        for d in entity_field_composed:
+            kd, vd = _get_key_value_composed(d)
+            json_dict = items[kd]
+            if not isinstance(json_dict, dict):
+                raise RuntimeError(
+                    """Key {} does not contain a dictionary
+                in the json file""".format(
+                        kd
+                    )
+                )
+            row += [json_dict[vd]]
+        writer.writerow(row)
+        return
+    # in general it is a list
+    for item in items:
+        row = object_fields + extract_fields(item, entity_fields)
+        writer.writerow(row)
--- a/Backend/venv/lib/python3.12/site-packages/nltk/twitter/twitter_demo.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/twitter/twitter_demo.py
@@ -0,0 +1,306 @@
+# Natural Language Toolkit: Twitter client
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Ewan Klein <ewan@inf.ed.ac.uk>
+#         Lorenzo Rubio <lrnzcig@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Examples to demo the :py:mod:`twitterclient` code.
+
+These demo functions should all run, with the following caveats:
+
+* You must have obtained API keys from Twitter, and installed them according to
+  the instructions in the `twitter HOWTO <https://www.nltk.org/howto/twitter.html>`_.
+
+* If you are on a slow network, some of the calls to the Twitter API may
+  timeout.
+
+* If you are being rate limited while searching, you will receive a 420
+  error response.
+
+* Your terminal window / console must be able to display UTF-8 encoded characters.
+
+For documentation about the Twitter APIs, see `The Streaming APIs Overview
+<https://dev.twitter.com/streaming/overview>`_ and `The REST APIs Overview
+<https://dev.twitter.com/rest/public>`_.
+
+For error codes see Twitter's
+`Error Codes and Responses <https://dev.twitter.com/overview/api/response-codes>`
+"""
+
+import datetime
+import json
+from functools import wraps
+from io import StringIO
+
+from nltk.twitter import (
+    Query,
+    Streamer,
+    TweetViewer,
+    TweetWriter,
+    Twitter,
+    credsfromfile,
+)
+
+SPACER = "###################################"
+
+
+def verbose(func):
+    """Decorator for demo functions"""
+
+    @wraps(func)
+    def with_formatting(*args, **kwargs):
+        print()
+        print(SPACER)
+        print("Using %s" % (func.__name__))
+        print(SPACER)
+        return func(*args, **kwargs)
+
+    return with_formatting
+
+
+def yesterday():
+    """
+    Get yesterday's datetime as a 5-tuple.
+    """
+    date = datetime.datetime.now()
+    date -= datetime.timedelta(days=1)
+    date_tuple = date.timetuple()[:6]
+    return date_tuple
+
+
+def setup():
+    """
+    Initialize global variables for the demos.
+    """
+    global USERIDS, FIELDS
+
+    USERIDS = ["759251", "612473", "15108702", "6017542", "2673523800"]
+    # UserIDs corresponding to\
+    #           @CNN,    @BBCNews, @ReutersLive, @BreakingNews, @AJELive
+    FIELDS = ["id_str"]
+
+
+@verbose
+def twitterclass_demo():
+    """
+    Use the simplified :class:`Twitter` class to write some tweets to a file.
+    """
+    tw = Twitter()
+    print("Track from the public stream\n")
+    tw.tweets(keywords="love, hate", limit=10)  # public stream
+    print(SPACER)
+    print("Search past Tweets\n")
+    tw = Twitter()
+    tw.tweets(keywords="love, hate", stream=False, limit=10)  # search past tweets
+    print(SPACER)
+    print(
+        "Follow two accounts in the public stream"
+        + " -- be prepared to wait a few minutes\n"
+    )
+    tw = Twitter()
+    tw.tweets(follow=["759251", "6017542"], stream=True, limit=5)  # public stream
+
+
+@verbose
+def sampletoscreen_demo(limit=20):
+    """
+    Sample from the Streaming API and send output to terminal.
+    """
+    oauth = credsfromfile()
+    client = Streamer(**oauth)
+    client.register(TweetViewer(limit=limit))
+    client.sample()
+
+
+@verbose
+def tracktoscreen_demo(track="taylor swift", limit=10):
+    """
+    Track keywords from the public Streaming API and send output to terminal.
+    """
+    oauth = credsfromfile()
+    client = Streamer(**oauth)
+    client.register(TweetViewer(limit=limit))
+    client.filter(track=track)
+
+
+@verbose
+def search_demo(keywords="nltk"):
+    """
+    Use the REST API to search for past tweets containing a given keyword.
+    """
+    oauth = credsfromfile()
+    client = Query(**oauth)
+    for tweet in client.search_tweets(keywords=keywords, limit=10):
+        print(tweet["text"])
+
+
+@verbose
+def tweets_by_user_demo(user="NLTK_org", count=200):
+    """
+    Use the REST API to search for past tweets by a given user.
+    """
+    oauth = credsfromfile()
+    client = Query(**oauth)
+    client.register(TweetWriter())
+    client.user_tweets(user, count)
+
+
+@verbose
+def lookup_by_userid_demo():
+    """
+    Use the REST API to convert a userID to a screen name.
+    """
+    oauth = credsfromfile()
+    client = Query(**oauth)
+    user_info = client.user_info_from_id(USERIDS)
+    for info in user_info:
+        name = info["screen_name"]
+        followers = info["followers_count"]
+        following = info["friends_count"]
+        print(f"{name}, followers: {followers}, following: {following}")
+
+
+@verbose
+def followtoscreen_demo(limit=10):
+    """
+    Using the Streaming API, select just the tweets from a specified list of
+    userIDs.
+
+    This is will only give results in a reasonable time if the users in
+    question produce a high volume of tweets, and may even so show some delay.
+    """
+    oauth = credsfromfile()
+    client = Streamer(**oauth)
+    client.register(TweetViewer(limit=limit))
+    client.statuses.filter(follow=USERIDS)
+
+
+@verbose
+def streamtofile_demo(limit=20):
+    """
+    Write 20 tweets sampled from the public Streaming API to a file.
+    """
+    oauth = credsfromfile()
+    client = Streamer(**oauth)
+    client.register(TweetWriter(limit=limit, repeat=False))
+    client.statuses.sample()
+
+
+@verbose
+def limit_by_time_demo(keywords="nltk"):
+    """
+    Query the REST API for Tweets about NLTK since yesterday and send
+    the output to terminal.
+
+    This example makes the assumption that there are sufficient Tweets since
+    yesterday for the date to be an effective cut-off.
+    """
+    date = yesterday()
+    dt_date = datetime.datetime(*date)
+    oauth = credsfromfile()
+    client = Query(**oauth)
+    client.register(TweetViewer(limit=100, lower_date_limit=date))
+
+    print(f"Cutoff date: {dt_date}\n")
+
+    for tweet in client.search_tweets(keywords=keywords):
+        print("{} ".format(tweet["created_at"]), end="")
+        client.handler.handle(tweet)
+
+
+@verbose
+def corpusreader_demo():
+    """
+    Use `TwitterCorpusReader` tp read a file of tweets, and print out
+
+    * some full tweets in JSON format;
+    * some raw strings from the tweets (i.e., the value of the `text` field); and
+    * the result of tokenising the raw strings.
+
+    """
+    from nltk.corpus import twitter_samples as tweets
+
+    print()
+    print("Complete tweet documents")
+    print(SPACER)
+    for tweet in tweets.docs("tweets.20150430-223406.json")[:1]:
+        print(json.dumps(tweet, indent=1, sort_keys=True))
+
+    print()
+    print("Raw tweet strings:")
+    print(SPACER)
+    for text in tweets.strings("tweets.20150430-223406.json")[:15]:
+        print(text)
+
+    print()
+    print("Tokenized tweet strings:")
+    print(SPACER)
+    for toks in tweets.tokenized("tweets.20150430-223406.json")[:15]:
+        print(toks)
+
+
+@verbose
+def expand_tweetids_demo():
+    """
+    Given a file object containing a list of Tweet IDs, fetch the
+    corresponding full Tweets, if available.
+
+    """
+    ids_f = StringIO(
+        """\
+        588665495492124672
+        588665495487909888
+        588665495508766721
+        588665495513006080
+        588665495517200384
+        588665495487811584
+        588665495525588992
+        588665495487844352
+        588665495492014081
+        588665495512948737"""
+    )
+    oauth = credsfromfile()
+    client = Query(**oauth)
+    hydrated = client.expand_tweetids(ids_f)
+
+    for tweet in hydrated:
+        id_str = tweet["id_str"]
+        print(f"id: {id_str}")
+        text = tweet["text"]
+        if text.startswith("@null"):
+            text = "[Tweet not available]"
+        print(text + "\n")
+
+
+ALL = [
+    twitterclass_demo,
+    sampletoscreen_demo,
+    tracktoscreen_demo,
+    search_demo,
+    tweets_by_user_demo,
+    lookup_by_userid_demo,
+    followtoscreen_demo,
+    streamtofile_demo,
+    limit_by_time_demo,
+    corpusreader_demo,
+    expand_tweetids_demo,
+]
+
+"""
+Select demo functions to run. E.g. replace the following line with "DEMOS =
+ALL[8:]" to execute only the final three demos.
+"""
+DEMOS = ALL[:]
+
+if __name__ == "__main__":
+    setup()
+
+    for demo in DEMOS:
+        demo()
+
+    print("\n" + SPACER)
+    print("All demos completed")
+    print(SPACER)
--- a/Backend/venv/lib/python3.12/site-packages/nltk/twitter/twitterclient.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/twitter/twitterclient.py
@@ -0,0 +1,562 @@
+# Natural Language Toolkit: Twitter client
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Ewan Klein <ewan@inf.ed.ac.uk>
+#         Lorenzo Rubio <lrnzcig@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+
+"""
+NLTK Twitter client
+
+This module offers methods for collecting and processing Tweets. Most of the
+functionality depends on access to the Twitter APIs, and this is handled via
+the third party Twython library.
+
+If one of the methods below returns an integer, it is probably a `Twitter
+error code <https://dev.twitter.com/overview/api/response-codes>`_. For
+example, the response of '420' means that you have reached the limit of the
+requests you can currently make to the Twitter API. Currently, `rate limits
+for the search API <https://dev.twitter.com/rest/public/rate-limiting>`_ are
+divided into 15 minute windows.
+"""
+
+import datetime
+import gzip
+import itertools
+import json
+import os
+import time
+
+import requests
+from twython import Twython, TwythonStreamer
+from twython.exceptions import TwythonError, TwythonRateLimitError
+
+from nltk.twitter.api import BasicTweetHandler, TweetHandlerI
+from nltk.twitter.util import credsfromfile, guess_path
+
+
+class Streamer(TwythonStreamer):
+    """
+    Retrieve data from the Twitter Streaming API.
+
+    The streaming API requires
+    `OAuth 1.0 <https://en.wikipedia.org/wiki/OAuth>`_ authentication.
+    """
+
+    def __init__(self, app_key, app_secret, oauth_token, oauth_token_secret):
+        self.handler = None
+        self.do_continue = True
+        TwythonStreamer.__init__(
+            self, app_key, app_secret, oauth_token, oauth_token_secret
+        )
+
+    def register(self, handler):
+        """
+        Register a method for handling Tweets.
+
+        :param TweetHandlerI handler: method for viewing
+        """
+        self.handler = handler
+
+    def on_success(self, data):
+        """
+        :param data: response from Twitter API
+        """
+        if self.do_continue:
+            if self.handler is not None:
+                if "text" in data:
+                    self.handler.counter += 1
+                    self.handler.handle(data)
+                    self.do_continue = self.handler.do_continue()
+            else:
+                raise ValueError("No data handler has been registered.")
+        else:
+            self.disconnect()
+            self.handler.on_finish()
+
+    def on_error(self, status_code, data):
+        """
+        :param status_code: The status code returned by the Twitter API
+        :param data: The response from Twitter API
+
+        """
+        print(status_code)
+
+    def sample(self):
+        """
+        Wrapper for 'statuses / sample' API call
+        """
+        while self.do_continue:
+            # Stream in an endless loop until limit is reached. See twython
+            # issue 288: https://github.com/ryanmcgrath/twython/issues/288
+            # colditzjb commented on 9 Dec 2014
+
+            try:
+                self.statuses.sample()
+            except requests.exceptions.ChunkedEncodingError as e:
+                if e is not None:
+                    print(f"Error (stream will continue): {e}")
+                continue
+
+    def filter(self, track="", follow="", lang="en"):
+        """
+        Wrapper for 'statuses / filter' API call
+        """
+        while self.do_continue:
+            # Stream in an endless loop until limit is reached
+
+            try:
+                if track == "" and follow == "":
+                    msg = "Please supply a value for 'track', 'follow'"
+                    raise ValueError(msg)
+                self.statuses.filter(track=track, follow=follow, lang=lang)
+            except requests.exceptions.ChunkedEncodingError as e:
+                if e is not None:
+                    print(f"Error (stream will continue): {e}")
+                continue
+
+
+class Query(Twython):
+    """
+    Retrieve data from the Twitter REST API.
+    """
+
+    def __init__(self, app_key, app_secret, oauth_token, oauth_token_secret):
+        """
+        :param app_key: (optional) Your applications key
+        :param app_secret: (optional) Your applications secret key
+        :param oauth_token: (optional) When using **OAuth 1**, combined with
+            oauth_token_secret to make authenticated calls
+        :param oauth_token_secret: (optional) When using **OAuth 1** combined
+            with oauth_token to make authenticated calls
+        """
+        self.handler = None
+        self.do_continue = True
+        Twython.__init__(self, app_key, app_secret, oauth_token, oauth_token_secret)
+
+    def register(self, handler):
+        """
+        Register a method for handling Tweets.
+
+        :param TweetHandlerI handler: method for viewing or writing Tweets to a file.
+        """
+        self.handler = handler
+
+    def expand_tweetids(self, ids_f, verbose=True):
+        """
+        Given a file object containing a list of Tweet IDs, fetch the
+        corresponding full Tweets from the Twitter API.
+
+        The API call `statuses/lookup` will fail to retrieve a Tweet if the
+        user has deleted it.
+
+        This call to the Twitter API is rate-limited. See
+        <https://dev.twitter.com/rest/reference/get/statuses/lookup> for details.
+
+        :param ids_f: input file object consisting of Tweet IDs, one to a line
+        :return: iterable of Tweet objects in JSON format
+        """
+        ids = [line.strip() for line in ids_f if line]
+
+        if verbose:
+            print(f"Counted {len(ids)} Tweet IDs in {ids_f}.")
+
+        # The Twitter endpoint takes lists of up to 100 ids, so we chunk the
+        # ids.
+        id_chunks = [ids[i : i + 100] for i in range(0, len(ids), 100)]
+
+        chunked_tweets = (self.lookup_status(id=chunk) for chunk in id_chunks)
+
+        return itertools.chain.from_iterable(chunked_tweets)
+
+    def _search_tweets(self, keywords, limit=100, lang="en"):
+        """
+        Assumes that the handler has been informed. Fetches Tweets from
+        search_tweets generator output and passses them to handler
+
+        :param str keywords: A list of query terms to search for, written as\
+        a comma-separated string.
+        :param int limit: Number of Tweets to process
+        :param str lang: language
+        """
+        while True:
+            tweets = self.search_tweets(
+                keywords=keywords, limit=limit, lang=lang, max_id=self.handler.max_id
+            )
+            for tweet in tweets:
+                self.handler.handle(tweet)
+            if not (self.handler.do_continue() and self.handler.repeat):
+                break
+        self.handler.on_finish()
+
+    def search_tweets(
+        self,
+        keywords,
+        limit=100,
+        lang="en",
+        max_id=None,
+        retries_after_twython_exception=0,
+    ):
+        """
+        Call the REST API ``'search/tweets'`` endpoint with some plausible
+        defaults. See `the Twitter search documentation
+        <https://dev.twitter.com/rest/public/search>`_ for more information
+        about admissible search parameters.
+
+        :param str keywords: A list of query terms to search for, written as\
+        a comma-separated string
+        :param int limit: Number of Tweets to process
+        :param str lang: language
+        :param int max_id: id of the last tweet fetched
+        :param int retries_after_twython_exception: number of retries when\
+        searching Tweets before raising an exception
+        :rtype: python generator
+        """
+        if not self.handler:
+            # if no handler is provided, `BasicTweetHandler` provides minimum
+            # functionality for limiting the number of Tweets retrieved
+            self.handler = BasicTweetHandler(limit=limit)
+
+        count_from_query = 0
+        if max_id:
+            self.handler.max_id = max_id
+        else:
+            results = self.search(
+                q=keywords, count=min(100, limit), lang=lang, result_type="recent"
+            )
+            count = len(results["statuses"])
+            if count == 0:
+                print("No Tweets available through REST API for those keywords")
+                return
+            count_from_query = count
+            self.handler.max_id = results["statuses"][count - 1]["id"] - 1
+
+            for result in results["statuses"]:
+                yield result
+                self.handler.counter += 1
+                if self.handler.do_continue() == False:
+                    return
+
+        # Pagination loop: keep fetching Tweets until the desired count is
+        # reached while dealing with Twitter rate limits.
+        retries = 0
+        while count_from_query < limit:
+            try:
+                mcount = min(100, limit - count_from_query)
+                results = self.search(
+                    q=keywords,
+                    count=mcount,
+                    lang=lang,
+                    max_id=self.handler.max_id,
+                    result_type="recent",
+                )
+            except TwythonRateLimitError as e:
+                print(f"Waiting for 15 minutes -{e}")
+                time.sleep(15 * 60)  # wait 15 minutes
+                continue
+            except TwythonError as e:
+                print(f"Fatal error in Twython request -{e}")
+                if retries_after_twython_exception == retries:
+                    raise e
+                retries += 1
+
+            count = len(results["statuses"])
+            if count == 0:
+                print("No more Tweets available through rest api")
+                return
+            count_from_query += count
+            # the max_id is also present in the Tweet metadata
+            # results['search_metadata']['next_results'], but as part of a
+            # query and difficult to fetch. This is doing the equivalent
+            # (last tweet id minus one)
+            self.handler.max_id = results["statuses"][count - 1]["id"] - 1
+
+            for result in results["statuses"]:
+                yield result
+                self.handler.counter += 1
+                if self.handler.do_continue() == False:
+                    return
+
+    def user_info_from_id(self, userids):
+        """
+        Convert a list of userIDs into a variety of information about the users.
+
+        See <https://dev.twitter.com/rest/reference/get/users/show>.
+
+        :param list userids: A list of integer strings corresponding to Twitter userIDs
+        :rtype: list(json)
+        """
+        return [self.show_user(user_id=userid) for userid in userids]
+
+    def user_tweets(self, screen_name, limit, include_rts="false"):
+        """
+        Return a collection of the most recent Tweets posted by the user
+
+        :param str user: The user's screen name; the initial '@' symbol\
+        should be omitted
+        :param int limit: The number of Tweets to recover; 200 is the maximum allowed
+        :param str include_rts: Whether to include statuses which have been\
+        retweeted by the user; possible values are 'true' and 'false'
+        """
+        data = self.get_user_timeline(
+            screen_name=screen_name, count=limit, include_rts=include_rts
+        )
+        for item in data:
+            self.handler.handle(item)
+
+
+class Twitter:
+    """
+    Wrapper class with restricted functionality and fewer options.
+    """
+
+    def __init__(self):
+        self._oauth = credsfromfile()
+        self.streamer = Streamer(**self._oauth)
+        self.query = Query(**self._oauth)
+
+    def tweets(
+        self,
+        keywords="",
+        follow="",
+        to_screen=True,
+        stream=True,
+        limit=100,
+        date_limit=None,
+        lang="en",
+        repeat=False,
+        gzip_compress=False,
+    ):
+        """
+        Process some Tweets in a simple manner.
+
+        :param str keywords: Keywords to use for searching or filtering
+        :param list follow: UserIDs to use for filtering Tweets from the public stream
+        :param bool to_screen: If `True`, display the tweet texts on the screen,\
+            otherwise print to a file
+
+        :param bool stream: If `True`, use the live public stream,\
+            otherwise search past public Tweets
+
+        :param int limit: The number of data items to process in the current\
+            round of processing.
+
+        :param tuple date_limit: The date at which to stop collecting\
+            new data. This should be entered as a tuple which can serve as the\
+            argument to `datetime.datetime`.\
+            E.g. `date_limit=(2015, 4, 1, 12, 40)` for 12:30 pm on April 1 2015.
+            Note that, in the case of streaming, this is the maximum date, i.e.\
+            a date in the future; if not, it is the minimum date, i.e. a date\
+            in the past
+
+        :param str lang: language
+
+        :param bool repeat: A flag to determine whether multiple files should\
+            be written. If `True`, the length of each file will be set by the\
+            value of `limit`. Use only if `to_screen` is `False`. See also
+            :py:func:`handle`.
+
+        :param gzip_compress: if `True`, output files are compressed with gzip.
+        """
+        if stream:
+            upper_date_limit = date_limit
+            lower_date_limit = None
+        else:
+            upper_date_limit = None
+            lower_date_limit = date_limit
+
+        if to_screen:
+            handler = TweetViewer(
+                limit=limit,
+                upper_date_limit=upper_date_limit,
+                lower_date_limit=lower_date_limit,
+            )
+        else:
+            handler = TweetWriter(
+                limit=limit,
+                upper_date_limit=upper_date_limit,
+                lower_date_limit=lower_date_limit,
+                repeat=repeat,
+                gzip_compress=gzip_compress,
+            )
+
+        if to_screen:
+            handler = TweetViewer(limit=limit)
+        else:
+            if stream:
+                upper_date_limit = date_limit
+                lower_date_limit = None
+            else:
+                upper_date_limit = None
+                lower_date_limit = date_limit
+
+            handler = TweetWriter(
+                limit=limit,
+                upper_date_limit=upper_date_limit,
+                lower_date_limit=lower_date_limit,
+                repeat=repeat,
+                gzip_compress=gzip_compress,
+            )
+
+        if stream:
+            self.streamer.register(handler)
+            if keywords == "" and follow == "":
+                self.streamer.sample()
+            else:
+                self.streamer.filter(track=keywords, follow=follow, lang=lang)
+        else:
+            self.query.register(handler)
+            if keywords == "":
+                raise ValueError("Please supply at least one keyword to search for.")
+            else:
+                self.query._search_tweets(keywords, limit=limit, lang=lang)
+
+
+class TweetViewer(TweetHandlerI):
+    """
+    Handle data by sending it to the terminal.
+    """
+
+    def handle(self, data):
+        """
+        Direct data to `sys.stdout`
+
+        :return: return ``False`` if processing should cease, otherwise return ``True``.
+        :rtype: bool
+        :param data: Tweet object returned by Twitter API
+        """
+        text = data["text"]
+        print(text)
+
+        self.check_date_limit(data)
+        if self.do_stop:
+            return
+
+    def on_finish(self):
+        print(f"Written {self.counter} Tweets")
+
+
+class TweetWriter(TweetHandlerI):
+    """
+    Handle data by writing it to a file.
+    """
+
+    def __init__(
+        self,
+        limit=2000,
+        upper_date_limit=None,
+        lower_date_limit=None,
+        fprefix="tweets",
+        subdir="twitter-files",
+        repeat=False,
+        gzip_compress=False,
+    ):
+        """
+        The difference between the upper and lower date limits depends on
+        whether Tweets are coming in an ascending date order (i.e. when
+        streaming) or descending date order (i.e. when searching past Tweets).
+
+        :param int limit: number of data items to process in the current\
+        round of processing.
+
+        :param tuple upper_date_limit: The date at which to stop collecting new\
+        data. This should be entered as a tuple which can serve as the\
+        argument to `datetime.datetime`. E.g. `upper_date_limit=(2015, 4, 1, 12,\
+        40)` for 12:30 pm on April 1 2015.
+
+        :param tuple lower_date_limit: The date at which to stop collecting new\
+        data. See `upper_data_limit` for formatting.
+
+        :param str fprefix: The prefix to use in creating file names for Tweet\
+        collections.
+
+        :param str subdir: The name of the directory where Tweet collection\
+        files should be stored.
+
+        :param bool repeat: flag to determine whether multiple files should be\
+        written. If `True`, the length of each file will be set by the value\
+        of `limit`. See also :py:func:`handle`.
+
+        :param gzip_compress: if `True`, output files are compressed with gzip.
+        """
+        self.fprefix = fprefix
+        self.subdir = guess_path(subdir)
+        self.gzip_compress = gzip_compress
+        self.fname = self.timestamped_file()
+        self.repeat = repeat
+        self.output = None
+        TweetHandlerI.__init__(self, limit, upper_date_limit, lower_date_limit)
+
+    def timestamped_file(self):
+        """
+        :return: timestamped file name
+        :rtype: str
+        """
+        subdir = self.subdir
+        fprefix = self.fprefix
+        if subdir:
+            if not os.path.exists(subdir):
+                os.mkdir(subdir)
+
+        fname = os.path.join(subdir, fprefix)
+        fmt = "%Y%m%d-%H%M%S"
+        timestamp = datetime.datetime.now().strftime(fmt)
+        if self.gzip_compress:
+            suffix = ".gz"
+        else:
+            suffix = ""
+        outfile = f"{fname}.{timestamp}.json{suffix}"
+        return outfile
+
+    def handle(self, data):
+        """
+        Write Twitter data as line-delimited JSON into one or more files.
+
+        :return: return `False` if processing should cease, otherwise return `True`.
+        :param data: tweet object returned by Twitter API
+        """
+        if self.startingup:
+            if self.gzip_compress:
+                self.output = gzip.open(self.fname, "w")
+            else:
+                self.output = open(self.fname, "w")
+            print(f"Writing to {self.fname}")
+
+        json_data = json.dumps(data)
+        if self.gzip_compress:
+            self.output.write((json_data + "\n").encode("utf-8"))
+        else:
+            self.output.write(json_data + "\n")
+
+        self.check_date_limit(data)
+        if self.do_stop:
+            return
+
+        self.startingup = False
+
+    def on_finish(self):
+        print(f"Written {self.counter} Tweets")
+        if self.output:
+            self.output.close()
+
+    def do_continue(self):
+        if self.repeat == False:
+            return TweetHandlerI.do_continue(self)
+
+        if self.do_stop:
+            # stop for a functional cause (e.g. date limit)
+            return False
+
+        if self.counter == self.limit:
+            # repeat is True, thus close output file and
+            # create a new one
+            self._restart_file()
+        return True
+
+    def _restart_file(self):
+        self.on_finish()
+        self.fname = self.timestamped_file()
+        self.startingup = True
+        self.counter = 0
--- a/Backend/venv/lib/python3.12/site-packages/nltk/twitter/util.py
+++ b/Backend/venv/lib/python3.12/site-packages/nltk/twitter/util.py
@@ -0,0 +1,147 @@
+# Natural Language Toolkit: Twitter client
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Ewan Klein <ewan@inf.ed.ac.uk>
+#         Lorenzo Rubio <lrnzcig@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Authentication utilities to accompany `twitterclient`.
+"""
+
+import os
+import pprint
+
+from twython import Twython
+
+
+def credsfromfile(creds_file=None, subdir=None, verbose=False):
+    """
+    Convenience function for authentication
+    """
+    return Authenticate().load_creds(
+        creds_file=creds_file, subdir=subdir, verbose=verbose
+    )
+
+
+class Authenticate:
+    """
+    Methods for authenticating with Twitter.
+    """
+
+    def __init__(self):
+        self.creds_file = "credentials.txt"
+        self.creds_fullpath = None
+
+        self.oauth = {}
+        try:
+            self.twitter_dir = os.environ["TWITTER"]
+            self.creds_subdir = self.twitter_dir
+        except KeyError:
+            self.twitter_dir = None
+            self.creds_subdir = None
+
+    def load_creds(self, creds_file=None, subdir=None, verbose=False):
+        """
+        Read OAuth credentials from a text file.
+
+        File format for OAuth 1::
+
+           app_key=YOUR_APP_KEY
+           app_secret=YOUR_APP_SECRET
+           oauth_token=OAUTH_TOKEN
+           oauth_token_secret=OAUTH_TOKEN_SECRET
+
+
+        File format for OAuth 2::
+
+           app_key=YOUR_APP_KEY
+           app_secret=YOUR_APP_SECRET
+           access_token=ACCESS_TOKEN
+
+        :param str file_name: File containing credentials. ``None`` (default) reads
+            data from `TWITTER/'credentials.txt'`
+        """
+        if creds_file is not None:
+            self.creds_file = creds_file
+
+        if subdir is None:
+            if self.creds_subdir is None:
+                msg = (
+                    "Supply a value to the 'subdir' parameter or"
+                    + " set the TWITTER environment variable."
+                )
+                raise ValueError(msg)
+        else:
+            self.creds_subdir = subdir
+
+        self.creds_fullpath = os.path.normpath(
+            os.path.join(self.creds_subdir, self.creds_file)
+        )
+
+        if not os.path.isfile(self.creds_fullpath):
+            raise OSError(f"Cannot find file {self.creds_fullpath}")
+
+        with open(self.creds_fullpath) as infile:
+            if verbose:
+                print(f"Reading credentials file {self.creds_fullpath}")
+
+            for line in infile:
+                if "=" in line:
+                    name, value = line.split("=", 1)
+                    self.oauth[name.strip()] = value.strip()
+
+        self._validate_creds_file(verbose=verbose)
+
+        return self.oauth
+
+    def _validate_creds_file(self, verbose=False):
+        """Check validity of a credentials file."""
+        oauth1 = False
+        oauth1_keys = ["app_key", "app_secret", "oauth_token", "oauth_token_secret"]
+        oauth2 = False
+        oauth2_keys = ["app_key", "app_secret", "access_token"]
+        if all(k in self.oauth for k in oauth1_keys):
+            oauth1 = True
+        elif all(k in self.oauth for k in oauth2_keys):
+            oauth2 = True
+
+        if not (oauth1 or oauth2):
+            msg = f"Missing or incorrect entries in {self.creds_file}\n"
+            msg += pprint.pformat(self.oauth)
+            raise ValueError(msg)
+        elif verbose:
+            print(f'Credentials file "{self.creds_file}" looks good')
+
+
+def add_access_token(creds_file=None):
+    """
+    For OAuth 2, retrieve an access token for an app and append it to a
+    credentials file.
+    """
+    if creds_file is None:
+        path = os.path.dirname(__file__)
+        creds_file = os.path.join(path, "credentials2.txt")
+    oauth2 = credsfromfile(creds_file=creds_file)
+    app_key = oauth2["app_key"]
+    app_secret = oauth2["app_secret"]
+
+    twitter = Twython(app_key, app_secret, oauth_version=2)
+    access_token = twitter.obtain_access_token()
+    tok = f"access_token={access_token}\n"
+    with open(creds_file, "a") as infile:
+        print(tok, file=infile)
+
+
+def guess_path(pth):
+    """
+    If the path is not absolute, guess that it is a subdirectory of the
+    user's home directory.
+
+    :param str pth: The pathname of the directory where files of tweets should be written
+    """
+    if os.path.isabs(pth):
+        return pth
+    else:
+        return os.path.expanduser(os.path.join("~", pth))