Skip to content

Commit

Permalink
Merge pull request #1 from sfu-discourse-lab/visualizations
Browse files Browse the repository at this point in the history
New code for extraction and stats on all corpora
  • Loading branch information
Vasundhara Gautam committed Mar 11, 2019
2 parents 1be736a + 5bcbef1 commit fb0de05
Show file tree
Hide file tree
Showing 41 changed files with 104,921 additions and 0 deletions.
3,922 changes: 3,922 additions & 0 deletions 500_frequencies/Relative frequency.ipynb

Large diffs are not rendered by default.

130 changes: 130 additions & 0 deletions NYT/.ipynb_checkpoints/Extracting NYT movie reviews 1-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import pandas as pd\n",
"import numpy as np\n",
"import sys, os, re, time\n",
"import json, csv\n",
"from bs4 import BeautifulSoup\n",
"from collections import defaultdict"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"API_SEARCH_URL = 'http:https://api.nytimes.com/svc/movies/v2/reviews/search.json?'\n",
"API_KEY = 'REPLACE_KEY_HERE'\n",
"OFFSET = 27248"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def getReviews(params={}):\n",
" query_params = []\n",
" \n",
" for k in params:\n",
" query_params.append(k + '=' + str(params[k]))\n",
" \n",
" if 'api_key' not in params:\n",
" query_params.append('api-key' + '=' + API_KEY)\n",
" \n",
" result = requests.get(API_SEARCH_URL + '&'.join(query_params)).json()\n",
" \n",
" time.sleep(6)\n",
" \n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"with open('nyt_reviews.csv', 'w+') as fo:\n",
" fieldnames = ['display_title', 'headline', 'byline', 'link', 'mpaa_rating',\n",
" 'opening_date', 'publication_date', 'date_updated',\n",
" 'critics_pick', 'summary_short']\n",
" \n",
" writer = csv.writer(fo)\n",
" writer.writerow(fieldnames)\n",
"\n",
" params = defaultdict(str)\n",
"\n",
" offset = OFFSET\n",
" while True:\n",
" params['offset'] = offset\n",
"\n",
"# print(offset)\n",
" \n",
" response = getReviews(params)\n",
" \n",
" if ('num_results' not in response):\n",
" if ('hasMore' not in response):\n",
" print('ERROR:')\n",
" print('offset=' + str(offset))\n",
" print(response)\n",
" break\n",
" else:\n",
" hasMore = response['hasMore']\n",
" num_results = len(response['results'])\n",
" else:\n",
" num_results = response['num_results']\n",
" if ('hasMore' not in response):\n",
" hasMore = True\n",
" else:\n",
" hasMore = response['hasMore']\n",
"\n",
" if (num_results == 0) or (hasMore is not True):\n",
" break\n",
" \n",
"# print(num_results)\n",
"\n",
" for result in response['results']:\n",
" line = [result['display_title'], result['headline'], result['byline'],\n",
" result['link']['url'], result['mpaa_rating'], result['opening_date'],\n",
" result['publication_date'], result['date_updated'],\n",
" result['critics_pick'], result['summary_short']]\n",
" writer.writerow(line)\n",
"\n",
"# print()\n",
"\n",
" offset += num_results"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit fb0de05

Please sign in to comment.