-
Notifications
You must be signed in to change notification settings - Fork 64
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #71 from abieiden/main
Print Book Functionality
- Loading branch information
Showing
80 changed files
with
192 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
import base64 | ||
import json | ||
import logging | ||
import time | ||
from io import BytesIO | ||
from typing import List | ||
|
||
from selenium import webdriver | ||
from selenium.webdriver.chrome.service import Service as ChromeService | ||
from selenium.webdriver.chrome.options import Options as ChromeOptions | ||
from webdriver_manager.chrome import ChromeDriverManager | ||
|
||
class PdfGenerator: | ||
""" | ||
Simple use case: | ||
pdf_file = PdfGenerator(['https://google.com']).main() | ||
with open('new_pdf.pdf', "wb") as outfile: | ||
outfile.write(pdf_file[0].getbuffer()) | ||
Code by: Nikita Tonkoshkur | ||
https://medium.com/@nikitatonkoshkur25/create-pdf-from-webpage-in-python-1e9603d6a430 | ||
""" | ||
driver = None | ||
# https://chromedevtools.github.io/devtools-protocol/tot/Page#method-printToPDF | ||
print_options = { | ||
'landscape': False, | ||
'displayHeaderFooter': False, | ||
'printBackground': True, | ||
'preferCSSPageSize': True, | ||
} | ||
|
||
def __init__(self, urls: List[str]): | ||
self.urls = urls | ||
|
||
def _get_pdf_from_url(self, url, *args, **kwargs): | ||
self.driver.get(url) | ||
|
||
time.sleep(1) # allow the page to load, increase if needed | ||
|
||
print_options = self.print_options.copy() | ||
result = self._send_devtools(self.driver, "Page.printToPDF", print_options) | ||
return base64.b64decode(result['data']) | ||
|
||
@staticmethod | ||
def _send_devtools(driver, cmd, params): | ||
""" | ||
Works only with chromedriver. | ||
Method uses cromedriver's api to pass various commands to it. | ||
""" | ||
resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id | ||
url = driver.command_executor._url + resource | ||
body = json.dumps({'cmd': cmd, 'params': params}) | ||
response = driver.command_executor._request('POST', url, body) | ||
return response.get('value') | ||
|
||
def _generate_pdfs(self): | ||
pdf_files = [] | ||
|
||
for url in self.urls: | ||
result = self._get_pdf_from_url(url) | ||
file = BytesIO() | ||
file.write(result) | ||
pdf_files.append(file) | ||
|
||
return pdf_files | ||
|
||
def main(self) -> List[BytesIO]: | ||
webdriver_options = ChromeOptions() | ||
webdriver_options.add_argument('--headless') | ||
webdriver_options.add_argument('--disable-gpu') | ||
|
||
try: | ||
self.driver = webdriver.Chrome( | ||
service=ChromeService(ChromeDriverManager().install()), | ||
options=webdriver_options | ||
) | ||
result = self._generate_pdfs() | ||
finally: | ||
self.driver.close() | ||
|
||
return result |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
import hjson | ||
import PyPDF2 | ||
import pdfgenerator | ||
import re | ||
import os | ||
|
||
# def create_pdfs(): | ||
# Opening JSON file | ||
f = open("../bookOutline.hjson") | ||
# returns JSON object as a dictionary | ||
data = hjson.load(f) | ||
# Closing file | ||
f.close() | ||
|
||
# folder to store pdfs | ||
if not os.path.exists('pdfs'): | ||
os.mkdir('pdfs') | ||
|
||
# base url for all pages | ||
base = 'https://chrispiech.github.io/probabilityForComputerScientists/en/' | ||
|
||
# get pdf for title page | ||
pdf_link = base + 'index.html' | ||
title_name = 'titlepage.pdf' | ||
if (not os.path.exists(os.path.join('pdfs', title_name))): | ||
# generate pdf file | ||
pdf_file = pdfgenerator.PdfGenerator([pdf_link]).main() | ||
# save pdf to file | ||
with open(os.path.join('pdfs', title_name), "wb") as outfile: | ||
outfile.write(pdf_file[0].getbuffer()) | ||
|
||
# get pdf_name and pdf_link for book from bookOutline and store in pdf_files | ||
pdf_files = {} | ||
|
||
for part in data: | ||
pdf_files[part] = {'sections':{}} | ||
for page in data[part]['sections']: | ||
title = data[part]['sections'][page] | ||
pdf_name = page + '.pdf' | ||
pdf_link = base + part + '/' + page | ||
# store pdf_name and title | ||
pdf_files[part]['sections'][pdf_name] = title | ||
# check if pdf already exists | ||
if (not os.path.exists(os.path.join('pdfs', pdf_name))): | ||
# generate pdf file | ||
pdf_file = pdfgenerator.PdfGenerator([pdf_link]).main() | ||
# save pdf to file | ||
with open(os.path.join('pdfs', pdf_name), "wb") as outfile: | ||
outfile.write(pdf_file[0].getbuffer()) | ||
if 'examples' in data[part].keys(): | ||
pdf_files[part]['examples'] = {} | ||
for page in data[part]['examples']: | ||
title = data[part]['examples'][page] | ||
pdf_name = page + '.pdf' | ||
pdf_link = base + 'examples' + '/' + page | ||
# store pdf_name and title | ||
pdf_files[part]['examples'][pdf_name] = title | ||
# check if pdf already exists | ||
if (not os.path.exists(os.path.join('pdfs', pdf_name))): | ||
# generate pdf file | ||
pdf_file = pdfgenerator.PdfGenerator([pdf_link]).main() | ||
# save pdf to file | ||
with open(os.path.join('pdfs', pdf_name), "wb") as outfile: | ||
outfile.write(pdf_file[0].getbuffer()) | ||
|
||
# Output PDF file name | ||
output_pdf = "CS109Book.pdf" | ||
|
||
# Create a PDF file writer object | ||
pdf_writer = PyPDF2.PdfWriter() | ||
|
||
# add title page | ||
pdf_writer.append(os.path.join('pdfs', title_name)) | ||
|
||
page_num = 1 | ||
for part in pdf_files: | ||
title = data[part]['title'] | ||
if title is None: | ||
title = "Introduction" | ||
# create outline for parts | ||
part_outline = pdf_writer.add_outline_item(title, page_num) | ||
# add pdf files to table of contents and book | ||
for pdf_file, title in pdf_files[part]['sections'].items(): | ||
# Open the pdf | ||
pdf_reader = PyPDF2.PdfReader(open(os.path.join('pdfs', pdf_file), "rb")) | ||
# Create an outline item for the pdf | ||
pdf_outline = pdf_writer.add_outline_item(title, page_num, parent=part_outline) | ||
# add pdf file to book pdf | ||
pdf_writer.append(os.path.join('pdfs', pdf_file)) | ||
# start of next pdf | ||
page_num += len(pdf_reader.pages) | ||
if 'examples' in pdf_files[part].keys(): | ||
# create outline for examples | ||
examples_outline = pdf_writer.add_outline_item('Applications', page_num, parent=part_outline) | ||
# add pdf files to table of contents and book | ||
for pdf_file, title in pdf_files[part]['examples'].items(): | ||
# Open the pdf | ||
pdf_reader = PyPDF2.PdfReader(open(os.path.join('pdfs', pdf_file), "rb")) | ||
# Create an outline item for the pdf | ||
pdf_outline = pdf_writer.add_outline_item(title, page_num, parent=examples_outline) | ||
pdf_writer.append(os.path.join('pdfs', pdf_file)) | ||
# start of next pdf | ||
page_num += len(pdf_reader.pages) | ||
|
||
# Save the merged PDF with the TOC | ||
with open(output_pdf, "wb") as output_file: | ||
pdf_writer.write(output_file) | ||
|
||
print(f"Merged PDF with Table of Contents saved as {output_pdf}") |