forked from kchodorow/blook
-
Notifications
You must be signed in to change notification settings - Fork 0
/
blook.py
52 lines (42 loc) · 1.45 KB
/
blook.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import argparse
from cache import Cache, PageNotFoundError
from ebook import Ebook
from filters import base
class Blook(object):
def _sanitize_url(self, url):
if not url.startswith('http'):
url = 'https://%s' % url
if url.endswith('/'):
url = url.rstrip('/')
return url
def run(self):
parser = argparse.ArgumentParser()
parser.add_argument("url", help="URL to download")
parser.add_argument(
"--limit", type=int, default=0, help="Max number of articles to download")
parser.add_argument('--clean_cache', action='store_true')
args = parser.parse_args()
if not args.url:
parser.print_help()
exit(1)
url = self._sanitize_url(args.url)
if args.clean_cache:
Cache(url).clean()
return
ebook = Ebook(url, args.limit, Cache(url))
try:
ebook.assemble()
print("Wrote %s to %s" % (ebook.get_title(), ebook.get_filename()))
except base.FilterNotFoundError, e:
print("""
ERROR: Blook could not figure out how to parse {url}.
To add support for downloading this blog, please create an issue at
https://github.com/kchodorow/blook/issues with the following title:
{msg} for {url}
Blook created a file called 'unparsable.html' in this directory, which contains
the HTML it didn't recognize. Please attach it to the GitHub issue.
""".format(url=url, msg=e.message))
except PageNotFoundError, e:
print(e.message)
if __name__ == '__main__':
Blook().run()