{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "bb08745f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Table 'PopularWords' created\n", "Analysis has been completed and the analyzed data has been saved in the database.\n" ] } ], "source": [ "import csv\n", "from collections import defaultdict\n", "import re\n", "from database import (drop_table,create_table,insert_word)\n", "\n", "table_name = \"PopularWords\"\n", "drop_table(table_name)\n", "create_table(table_name, \"word\",\"count\")\n", "\n", "# create a defaultdict to keep track of word counts\n", "words_with_count = defaultdict(int)\n", "\n", "# open the CSV file and read it using the csv module\n", "with open('data/reviews.csv', 'r') as file:\n", " reader = csv.DictReader(file)\n", " # iterate over each row in the file\n", " for row in reader:\n", " # extract the review title from the row\n", " title = row['title']\n", " # remove any non-letter characters, except spaces\n", " title = re.sub(r'[^a-zA-Z\\s]', '', title)\n", " # split the title into individual words\n", " words = title.split()\n", " # iterate over each word in the title and increment its count in the defaultdict\n", " for word in words:\n", " words_with_count[word] += 1\n", "for word in words_with_count:\n", " insert_word(table_name,word, words_with_count[word])\n", "print(\"Analysis has been completed and the analyzed data has been saved in the database.\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "81884aa1", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" } }, "nbformat": 4, "nbformat_minor": 5 }