-
Notifications
You must be signed in to change notification settings - Fork 0
/
tasks.js
149 lines (142 loc) · 5.2 KB
/
tasks.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
var fs = require('fs');
var sleep = require('system-sleep');
var async = require('async');
var range = require('range').range;
var util = require('util');
var MongoClient = require('mongodb').MongoClient;
var redis = require('redis');
var common_client = null;
var assert = require('assert');
var mkdirp = require('mkdirp');
var crypto = require('crypto');
var exec = require('child_process').exec;
var companyfetcher = require('./companyfetcher');
var proxyfetcher = require('./proxyfetcher');
var config = JSON.parse(fs.readFileSync('config.json', 'utf8'));
var yargs = require('yargs').argv;
var max_threads = yargs.t || 3;
var sleep_secs = yargs.s || 1;
var is_continue = yargs.c || false;
var with_job = yargs.j || false;
var oldTask = yargs.o || false;
var company_id_range = getCompanyIdRange(yargs.r);
var taskId = crypto.createHash('md5').update(new Date().toISOString()).digest("hex");
if(oldTask) {
common_client = redis.createClient(config.common_redis);
common_client.on("error", function (err) {
console.log("remote redis Error " + err);
});
}
if (is_continue) {
MongoClient.connect(config.mongo_url, function (err, db) {
assert.equal(null, err);
checkLastCompanyId(db, function (start) {
runTasks(start);
});
});
} else if(oldTask) {
runOldTasks();
} else
runTasks(company_id_range[0]);
var checkLastCompanyId = function (db, callback) {
var cursor = db.collection('company').find(
{
$and: [
{"companyId": {$gte: company_id_range[0]}},
{"companyId": {$lte: company_id_range[1]}}
]
}
).sort({update_time: -1}).limit(1);
cursor.toArray(function (err, items) {
assert.equal(null, err);
if (items.length) {
callback(items[0].companyId);
} else {
callback(company_id_range[0]);
}
});
};
function runOldTasks(start) {
if(start)
util.log('old task: ' + taskId + ' is running');
common_client.lpop('stored_company_ids', function(err, companyId) {
if(companyId >0 ) {
companyfetcher(companyId, with_job, function (result) {
util.log(result);
if (sleep_secs) {
// 2000 以内的随机数
var delay = parseInt((Math.random() * 10000000) % 2000, 10);
if (companyId % 100 == 0)
delay = 15;
sleep(delay * sleep_secs);
}
});
runOldTasks(false);
} else {
var dir = "/tmp/lagou/";
mkdirp(dir, function (err) {
assert.equal(null, err);
util.log("old task: " + taskId + " results file was saved! all done!!!");
exec('sudo pkill -f lagou/tasks.js', (error, stdout, stderr) => {
if (error) {
console.error(`exec error: ${error}`);
return;
}
if (stderr && stderr.indexOf('proxychains') == -1) {
console.error(`exec stderror: ${stderr}`);
return;
}
console.info('kill all node process!!!');
process.exit(0);
});
});
}
});
}
function runTasks(start) {
yargs.time = new Date();
util.log('task: ' + taskId + ' is running');
async.mapLimit(range(start, company_id_range[1]), max_threads, function (companyId, callback) {
companyfetcher(companyId, with_job, function (result) {
util.log(result);
callback(null, result);
if (sleep_secs) {
// 2000 以内的随机数
var delay = parseInt((Math.random() * 10000000) % 2000, 10);
if (companyId % 100 == 0)
delay = 15;
sleep(parseInt(delay) * parseInt(sleep_secs));
}
});
}, function (err, result) {
var dir = "/tmp/lagou/";
mkdirp(dir, function (err) {
assert.equal(null, err);
fs.writeFile(dir + "results-" + taskId, result, function (err) {
if (err) {
return console.error(err);
}
util.log("task: " + taskId + " results file was saved! all done!!!");
exec('sudo pkill -f lagou/tasks.js', (error, stdout, stderr) => {
if (error) {
console.error(`exec error: ${error}`);
return;
}
if (stderr && stderr.indexOf('proxychains') == -1) {
console.error(`exec stderror: ${stderr}`);
return;
}
console.info('kill all node process!!!');
process.exit(0);
});
});
});
});
}
function getCompanyIdRange(range_str) {
if (!range_str || util.isNumber(range_str))
company_id_range = [parseInt(range_str || 1), 100000];
else
company_id_range = yargs.r.split(',').map(Number);
return [Math.min.apply(null, company_id_range), Math.max.apply(null, company_id_range)];
}