Skip to content

Commit

Permalink
Merge pull request pulumi#68 from pulumi/lindydonna/twitter-athena
Browse files Browse the repository at this point in the history
Twitter Athena example
  • Loading branch information
joeduffy committed Jun 15, 2018
2 parents 70d6131 + c4363b8 commit faa9422
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 0 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,8 @@ Guestbook](https://kubernetes.io/docs/tutorials/stateless-application/guestbook/
#### [AWS Voting App with Containers](cloud-ts-voting-app/)

A simple voting app that uses Redis for a data store and a Python Flask app for the frontend, demonstrating the high-level framework `@pulumi/cloud`.

#### [AWS Athena Twitter Analyzer](cloud-js-twitter-athena/)

A sample project that periodically queries Twitter for a search term, stores the results in S3, and configures an Athena query for data analysis.

3 changes: 3 additions & 0 deletions cloud-js-twitter-athena/Pulumi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
name: aws-serverless-js-twitter
description: Analyze tweets in AWS Athena
runtime: nodejs
65 changes: 65 additions & 0 deletions cloud-js-twitter-athena/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Twitter Search in Athena

A sample project that queries Twitter every 2 minutes and stores the results in S3. The project also sets up an Athena table and query. This project demonstrates using `cloud.Timer` to run a Lambda on an interval.

## Setup

Register a new [Twitter app](https://apps.twitter.com/).

## Deploy and run the program

1. Create a new stack:

```
pulumi stack init twitter-athena
```

1. In Twitter, get the keys for your application. Set configuration values for your Twitter consumer key/secret and application key/secret. Use the `--secret` flag to securely encrypt secret values.

```
pulumi config set twitterAccessTokenKey <Value for Consumer Key (API Key)>
pulumi config set --secret twitterAccessTokenSecret <Value for Consumer Secret (API Secret)>
pulumi config set twitterConsumerKey <Value for Access Token>
pulumi config set --secret twitterConsumerSecret <Value for Access Token Secret>
```

1. Set a search term to query for:

```
pulumi config set twitterQuery "Amazon Web Services"
```

1. Set the AWS region:

```bash
pulumi config set aws:region us-west-2
```

1. Restore NPM modules via `npm install`.

1. Preview and run the deployment via `pulumi update`. A total of 16 resources are created.

1. Run `pulumi stack output` to view output properties (or view the stack on pulumi.com).

```
$ pulumi stack output
Please choose a stack: aws-serverless-js-twitter-dev
Current stack outputs (4):
OUTPUT VALUE
athenaDatabase tweets_database
bucketName tweet-bucket-de18828
createTableQueryUri https://us-west-2.console.aws.amazon.com/athena/home?force#query/saved/e394800e-a35e-44b3-b8ca-8b47b0f74469
topUsersQueryUri https://us-west-2.console.aws.amazon.com/athena/home?force#query/saved/51fa5744-bab6-4e5f-8cd6-9447b6619f06
```

1. Navigate to the URL for `createTableQueryUri` and run the query in the Athena console. This will create a table called `tweets`.

1. Navigate to the URL for `topUsersQueryUri` and run the query in Athena. You'll see tweets for your search term, by users with more than 1000 followers.

![Athena console](athena-screenshot.png)

## Clean up

To clean up resources, run `pulumi destroy` and answer the confirmation question at the prompt.


Binary file added cloud-js-twitter-athena/athena-screenshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
99 changes: 99 additions & 0 deletions cloud-js-twitter-athena/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
const pulumi = require("@pulumi/pulumi");
const aws = require("@pulumi/aws");
const cloud = require("@pulumi/cloud-aws");

const bucket = new cloud.Bucket("tweet-bucket");

let config = new pulumi.Config("aws-serverless-js-twitter");
let consumerKey = config.require("twitterConsumerKey");
let consumerSecret = config.require("twitterConsumerSecret");
let accessTokenKey = config.require("twitterAccessTokenKey");
let accessTokenSecret = config.require("twitterAccessTokenSecret");

let twitterQuery = config.require("twitterQuery");
const outputFolder = "tweets";

cloud.timer.interval("twitter-search-timer", { minutes: 2 }, async() => {
var twitterClient = require('twitter');

var client = new twitterClient({
consumer_key: consumerKey,
consumer_secret: consumerSecret,
access_token_key: accessTokenKey,
access_token_secret: accessTokenSecret,
});

client.get('search/tweets', {q: twitterQuery, count: 100}, function(error, tweets, response) {
let statuses = tweets.statuses;

let results = statuses.map(s => {
let user = s.user.screen_name;

return JSON.stringify({
created_at: s.created_at,
id: s.id_str,
text: s.text,
user: user,
hashtags: s.entities.hashtags,
followers: s.user.followers_count,
isVerified: s.user.verified,
isRetweet: s.retweeted_status != null,
url: `https://twitter.com/${user}/status/${s.id_str}`,
});
});

console.log(`Got ${results.length} tweets from Twitter for query ${twitterQuery}`);

let filename = `${outputFolder}/${Date.now()}`;
let contents = Buffer.from(results.join("\n"), "utf8");

bucket.put(filename, contents);
});
});

// athena setup
let athena = new aws.athena.Database("tweets_database",
{ name: "tweets_database", bucket: bucket.bucket.id, forceDestroy: true }
);

// Sadly, there isn't support for Athena tables in Terraform.
// See https://github.com/terraform-providers/terraform-provider-aws/pull/1893#issuecomment-351300973
// So, we'll instead create a query for the table definition
function createTableQuery(bucket) {
return `CREATE EXTERNAL TABLE IF NOT EXISTS tweets (
id string,
text string,
user string,
isVerified boolean,
url string,
followers int,
hashtags string,
isRetweet boolean
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
LOCATION 's3:https://${bucket}/${outputFolder}/';`;
}

let topUsersQuery =
`select distinct user, followers, text, url
from tweets
where isRetweet = false and followers > 1000
order by followers desc`;

let bucketName = bucket.bucket.id;

let createTableAthenaQuery = new aws.athena.NamedQuery(
"createTable", { database: athena.id, query: bucketName.apply(createTableQuery)});

let topUsersAthenaQuery = new aws.athena.NamedQuery("topUsers", { database: athena.id, query: topUsersQuery});

function getQueryUri(queryId) {
let config = new pulumi.Config("aws");
let region = config.require("region");
return `https://${region}.console.aws.amazon.com/athena/home?force#query/saved/${queryId}`;
}

exports.bucketName = bucketName
exports.athenaDatabase = athena.id;
exports.topUsersQueryUri = topUsersAthenaQuery.id.apply(getQueryUri);
exports.createTableQueryUri = createTableAthenaQuery.id.apply(getQueryUri);
10 changes: 10 additions & 0 deletions cloud-js-twitter-athena/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"name": "aws-serverless-js-twitter",
"main": "index.js",
"dependencies": {
"@pulumi/cloud": "^0.13.1",
"@pulumi/cloud-aws": "^0.13.1",
"@pulumi/pulumi": "latest",
"twitter": "^1.7.1"
}
}

0 comments on commit faa9422

Please sign in to comment.