Skip to content

Commit

Permalink
Web Scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
acgrdumlu committed Oct 5, 2017
1 parent 381a97f commit 7ffb58e
Show file tree
Hide file tree
Showing 9 changed files with 1,486 additions and 1 deletion.
1 change: 1 addition & 0 deletions 11-flashcards-app
Submodule 11-flashcards-app added at e437fe
1 change: 0 additions & 1 deletion 17-github-web-scrape
Submodule 17-github-web-scrape deleted from c89029
59 changes: 59 additions & 0 deletions 18-github-web-scrape/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*

# Runtime data
pids
*.pid
*.seed
*.pid.lock

# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov

# Coverage directory used by tools like istanbul
coverage

# nyc test coverage
.nyc_output

# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt

# Bower dependency directory (https://bower.io/)
bower_components

# node-waf configuration
.lock-wscript

# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release

# Dependency directories
node_modules/
jspm_packages/

# Typescript v1 declaration files
typings/

# Optional npm cache directory
.npm

# Optional eslint cache
.eslintcache

# Optional REPL history
.node_repl_history

# Output of 'npm pack'
*.tgz

# Yarn Integrity file
.yarn-integrity

# dotenv environment variables file
.env

3 changes: 3 additions & 0 deletions 18-github-web-scrape/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Github Web Scrape

Simple web scraper to get a github activities count and date.
49 changes: 49 additions & 0 deletions 18-github-web-scrape/app.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
const express = require('express');
var fs = require('fs');
const http = require('http');
var cheerio = require('cheerio');
const request = require('request');
const bodyParser = require('body-parser');

const app = express();

app.use(bodyParser.urlencoded({ extended: false }));

app.set('view engine', 'pug');

app.get('/', (req, res) => {
res.render('index');
// res.send(req.query.username);
});

app.get('/scrape?:username', function (req, res) {
const name = req.query.username;
console.log(name);

url = `https://www.github.com/${name}`;

request(url, function (error, response, html) {
if (!error) {
var $ = cheerio.load(html);
var parsedResults = [];
$('rect.day').each(function (i, element) {
var count = $(this).attr('data-count');
var date = $(this).attr('data-date');

var data = {
count: parseInt(count),
date: date
};
parsedResults.push(data);
});
}
fs.writeFile('output.json', JSON.stringify(parsedResults, null, 4), function (err) {
console.log('File successfully written! - Check your project directory for the output.json file');
})
res.send('Check your console!')
})
})

app.listen(3000, () => {
console.log('The application is running on localhost:3000!')
});
Loading

0 comments on commit 7ffb58e

Please sign in to comment.