{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 4.3 Pre-trained Transformer Models" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import transformers\n", "from transformers import pipeline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "sentiment_pipeline = pipeline(\"sentiment-analysis\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "sentence_1 = \"i had a great time at the movie it was really funny\"\n", "sentence_2 = \"i had a great time at the movie but the parking was terrible\"\n", "sentence_3 = \"i had a great time at the movie but the parking wasn't great\"\n", "sentence_4 = \"i went to see a movie\"" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "i had a great time at the movie it was really funny\n" ] }, { "data": { "text/plain": [ "[{'label': 'POSITIVE', 'score': 0.9998176693916321}]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(sentence_1)\n", "sentiment_pipeline(sentence_1)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['POSITIVE']" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test = sentiment_pipeline(sentence_1)\n", "[sub['label'] for sub in test]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "i had a great time at the movie but the parking was terrible\n" ] }, { "data": { "text/plain": [ "[{'label': 'NEGATIVE', 'score': 0.997746467590332}]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(sentence_2)\n", "sentiment_pipeline(sentence_2)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "i had a great time at the movie but the parking wasn't great\n" ] }, { "data": { "text/plain": [ "[{'label': 'NEGATIVE', 'score': 0.9984902739524841}]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(sentence_3)\n", "sentiment_pipeline(sentence_3)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "i went to see a movie\n" ] }, { "data": { "text/plain": [ "[{'label': 'POSITIVE', 'score': 0.9802699685096741}]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(sentence_4)\n", "sentiment_pipeline(sentence_4)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The default model isn't giving us great results for our neutral sentence. However there are loads of models we can choose to use that have been trained on different data with different parameters: https://huggingface.co/models?pipeline_tag=text-classification&sort=downloads&search=sentiment\n", "\n", "Let's try a specific model trained on tweets in the English language only and see if that improves our results." ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "emoji is not installed, thus not converting emoticons or emojis into text. Please install emoji: pip3 install emoji\n" ] } ], "source": [ "specific_model = pipeline(\"sentiment-analysis\", model=\"finiteautomata/bertweet-base-sentiment-analysis\")" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "i had a great time at the movie it was really funny\n" ] }, { "data": { "text/plain": [ "[{'label': 'POS', 'score': 0.9923344850540161}]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(sentence_1)\n", "specific_model(sentence_1)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "i had a great time at the movie but the parking was terrible\n" ] }, { "data": { "text/plain": [ "[{'label': 'NEG', 'score': 0.5355542302131653}]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(sentence_2)\n", "specific_model(sentence_2)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "i had a great time at the movie but the parking wasn't great\n" ] }, { "data": { "text/plain": [ "[{'label': 'POS', 'score': 0.6234408020973206}]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(sentence_3)\n", "specific_model(sentence_3)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "i went to see a movie\n" ] }, { "data": { "text/plain": [ "[{'label': 'NEU', 'score': 0.9007399678230286}]" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(sentence_4)\n", "specific_model(sentence_4)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 4 }