-
Notifications
You must be signed in to change notification settings - Fork 19
/
crawluri.thrift
56 lines (39 loc) · 1.32 KB
/
crawluri.thrift
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# Description of the CrawlUri thrift structure
namespace py spyder.thrift.gen
/**
* Some typedefs in order to make the code more readable.
*/
typedef i64 timestamp
typedef map<string,string> header
typedef map<string,string> key_value
/**
* The main strcut for CrawlUris.
*
* This contains some metadata and if possible the saved web page.
*/
struct CrawlUri {
// readable version of the url to crawl
1: string url,
// the effective url used for downloading the content (i.e.: IP instead of hostname)
2: string effective_url,
// the host identifier used for queue selection
3: i16 current_priority,
// when processing has been started
4: timestamp begin_processing,
// when processing is finished
5: timestamp end_processing,
// the http request headers
6: header req_header,
// the http response headers
7: header rep_header
// the saved content body
8: string content_body,
// the servers status code
9: i16 status_code,
// request time
10: double req_time,
// queue time
11: double queue_time,
// additional values from other processors
12: key_value optional_vars
}