Skip to content

Commit

Permalink
update node class
Browse files Browse the repository at this point in the history
  • Loading branch information
kugouming committed Jan 23, 2016
1 parent f038d84 commit 7705804
Show file tree
Hide file tree
Showing 683 changed files with 52,626 additions and 0 deletions.
4 changes: 4 additions & 0 deletions nodejs/class-crawler/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# 利用Node 抓取慕课网数据

## 分析页面模块 cheerio
安装:`npm install cheerio`
88 changes: 88 additions & 0 deletions nodejs/class-crawler/crawler.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
var http = require('http')
var cheerio = require('cheerio')
var url = 'http:https://www.imooc.com/learn/348'

function filterChapters(html) {
var $ = cheerio.load(html)
//var chapters = $('.mod-chapters')
var chapters = $('.chapter')
/*
data format
[{
chapter : '',
list : [{
title : '',
url : ''
},
{
title : '',
url : '',
}]
}]
*/

// 实现逻辑
var chapterList = []
chapters.each(function(item){
var videoData = []
var chapter = $(this)
var chapterTitle = chapter.find('strong').text()
//console.log(chapterTitle)
var videos = chapter.find('li')
videos.each(function(item){
var video = $(this)
var videoTitle = video.text()
var videoUrl = video.children('.studyvideo').attr('href')
var videoId = videoUrl.match(/\d+/)[0]
//console.log(videoId)
videoData.push({
id: videoId,
title: videoTitle,
url: videoUrl
})
})
chapterList.push({
chapterTitle: chapterTitle,
chapterData: videoData
})

})
return chapterList
}


/**
*
* 打印课程信息
*
* @brief printCourseInfo
*
* @param courseData
*
* @return
*/
function printCourseInfo(courseData) {
courseData.forEach(function(item){
var chapterTitle = item.chapterTitle
console.log(chapterTitle)
var videos = item.chapterData
videos.forEach(function(item, index){
console.log(' ' + index + ': [' + item.id + '] ' + item.title.trim())
console.log(' http:https://www.imooc.com' + item.url)
})
})
}

http.get(url, function(res){
var html = ''
res.on('data', function(data){
html += data
})
res.on('end', function(){
var courseData = filterChapters(html)
printCourseInfo(courseData)
//console.log(html)
})
}).on('error', function(){
console.log('获取数据失败')
})
16 changes: 16 additions & 0 deletions nodejs/class-crawler/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
var http = require('http')

var url = 'http:https://www.imooc.com/learn/348'

http.get(url, function(res){
var html = ''
res.on('data', function(data){
html += data
})

res.on('end', function(){
console.log(html)
})
}).on('error', function(){
console.log('获取课程数据出错!')
})
10 changes: 10 additions & 0 deletions nodejs/class-crawler/node_modules/boolbase/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions nodejs/class-crawler/node_modules/boolbase/index.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

78 changes: 78 additions & 0 deletions nodejs/class-crawler/node_modules/boolbase/package.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions nodejs/class-crawler/node_modules/cheerio/.jshintrc

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions nodejs/class-crawler/node_modules/cheerio/.npmignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions nodejs/class-crawler/node_modules/cheerio/.travis.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 47 additions & 0 deletions nodejs/class-crawler/node_modules/cheerio/CONTRIBUTING.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 7705804

Please sign in to comment.