Skip to content

Commit

Permalink
Merge pull request #71 from abieiden/main
Browse files Browse the repository at this point in the history
Print Book Functionality
  • Loading branch information
chrispiech committed Oct 20, 2023
2 parents 6b6e7a3 + 66a957f commit d45186b
Show file tree
Hide file tree
Showing 80 changed files with 192 additions and 1 deletion.
2 changes: 1 addition & 1 deletion chapters/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ <h1>Course Reader for CS109</h1>
</p>

<p><center><a class="btn btn-primary btn-lg" href="{{pathToLang}}part1/counting" role="button">Get Started</a></center></p>

<p><center><a href="../print/CS109Book.pdf">Click to Download the CS109 Book</a></center></p>
<div class="alert alert-primary"><b>Notable Updates Fall 2023</b>:
<ol>
<li><a href="{{pathToLang}}part1/prob_or">General Inclusion-Exclusion</a>. <i>Oct 7th 2023</i></li>
Expand Down
Binary file added print/CS109Book.pdf
Binary file not shown.
82 changes: 82 additions & 0 deletions print/pdfgenerator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import base64
import json
import logging
import time
from io import BytesIO
from typing import List

from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options as ChromeOptions
from webdriver_manager.chrome import ChromeDriverManager

class PdfGenerator:
"""
Simple use case:
pdf_file = PdfGenerator(['https://google.com']).main()
with open('new_pdf.pdf', "wb") as outfile:
outfile.write(pdf_file[0].getbuffer())
Code by: Nikita Tonkoshkur
https://medium.com/@nikitatonkoshkur25/create-pdf-from-webpage-in-python-1e9603d6a430
"""
driver = None
# https://chromedevtools.github.io/devtools-protocol/tot/Page#method-printToPDF
print_options = {
'landscape': False,
'displayHeaderFooter': False,
'printBackground': True,
'preferCSSPageSize': True,
}

def __init__(self, urls: List[str]):
self.urls = urls

def _get_pdf_from_url(self, url, *args, **kwargs):
self.driver.get(url)

time.sleep(1) # allow the page to load, increase if needed

print_options = self.print_options.copy()
result = self._send_devtools(self.driver, "Page.printToPDF", print_options)
return base64.b64decode(result['data'])

@staticmethod
def _send_devtools(driver, cmd, params):
"""
Works only with chromedriver.
Method uses cromedriver's api to pass various commands to it.
"""
resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
url = driver.command_executor._url + resource
body = json.dumps({'cmd': cmd, 'params': params})
response = driver.command_executor._request('POST', url, body)
return response.get('value')

def _generate_pdfs(self):
pdf_files = []

for url in self.urls:
result = self._get_pdf_from_url(url)
file = BytesIO()
file.write(result)
pdf_files.append(file)

return pdf_files

def main(self) -> List[BytesIO]:
webdriver_options = ChromeOptions()
webdriver_options.add_argument('--headless')
webdriver_options.add_argument('--disable-gpu')

try:
self.driver = webdriver.Chrome(
service=ChromeService(ChromeDriverManager().install()),
options=webdriver_options
)
result = self._generate_pdfs()
finally:
self.driver.close()

return result
Binary file added print/pdfs/100_binomial_problems.pdf
Binary file not shown.
Binary file added print/pdfs/algorithmic_analysis.pdf
Binary file not shown.
Binary file added print/pdfs/all_distributions.pdf
Binary file not shown.
Binary file added print/pdfs/bacteria_evolution.pdf
Binary file not shown.
Binary file added print/pdfs/bayes_theorem.pdf
Binary file not shown.
Binary file added print/pdfs/bayesian_carbon_dating.pdf
Binary file not shown.
Binary file added print/pdfs/bayesian_networks.pdf
Binary file not shown.
Binary file added print/pdfs/bernoulli.pdf
Binary file not shown.
Binary file added print/pdfs/beta.pdf
Binary file not shown.
Binary file added print/pdfs/binomial.pdf
Binary file not shown.
Binary file added print/pdfs/binomial_approx.pdf
Binary file not shown.
Binary file added print/pdfs/bootstrapping.pdf
Binary file not shown.
Binary file added print/pdfs/bridge_distribution.pdf
Binary file not shown.
Binary file added print/pdfs/calculators.pdf
Binary file not shown.
Binary file added print/pdfs/clt.pdf
Binary file not shown.
Binary file added print/pdfs/combinatorics.pdf
Binary file not shown.
Binary file added print/pdfs/computational_inference.pdf
Binary file not shown.
Binary file added print/pdfs/cond_prob.pdf
Binary file not shown.
Binary file added print/pdfs/continuous.pdf
Binary file not shown.
Binary file added print/pdfs/continuous_joint.pdf
Binary file not shown.
Binary file added print/pdfs/core_probability_ref.pdf
Binary file not shown.
Binary file added print/pdfs/correlation.pdf
Binary file not shown.
Binary file added print/pdfs/counting.pdf
Binary file not shown.
Binary file added print/pdfs/curse_of_dimensionality.pdf
Binary file not shown.
Binary file added print/pdfs/dart_logo.pdf
Binary file not shown.
Binary file added print/pdfs/differential_privacy.pdf
Binary file not shown.
Binary file added print/pdfs/digital_vision_test.pdf
Binary file not shown.
Binary file added print/pdfs/enigma.pdf
Binary file not shown.
Binary file added print/pdfs/equally_likely.pdf
Binary file not shown.
Binary file added print/pdfs/expectation.pdf
Binary file not shown.
Binary file added print/pdfs/exponential.pdf
Binary file not shown.
Binary file added print/pdfs/fairness.pdf
Binary file not shown.
Binary file added print/pdfs/federalist.pdf
Binary file not shown.
Binary file added print/pdfs/grades_not_normal.pdf
Binary file not shown.
Binary file added print/pdfs/grading_eye_inflammation.pdf
Binary file not shown.
Binary file added print/pdfs/independence.pdf
Binary file not shown.
Binary file added print/pdfs/independent_vars.pdf
Binary file not shown.
Binary file added print/pdfs/inference.pdf
Binary file not shown.
Binary file added print/pdfs/joint.pdf
Binary file not shown.
Binary file added print/pdfs/jury.pdf
Binary file not shown.
Binary file added print/pdfs/law_total.pdf
Binary file not shown.
Binary file added print/pdfs/log_probabilities.pdf
Binary file not shown.
Binary file added print/pdfs/log_regression.pdf
Binary file not shown.
Binary file added print/pdfs/machine_learning.pdf
Binary file not shown.
Binary file added print/pdfs/many_flips.pdf
Binary file not shown.
Binary file added print/pdfs/map.pdf
Binary file not shown.
Binary file added print/pdfs/mixture_models.pdf
Binary file not shown.
Binary file added print/pdfs/mle.pdf
Binary file not shown.
Binary file added print/pdfs/mle_demo.pdf
Binary file not shown.
Binary file added print/pdfs/mle_pareto.pdf
Binary file not shown.
Binary file added print/pdfs/multinomial.pdf
Binary file not shown.
Binary file added print/pdfs/naive_bayes.pdf
Binary file not shown.
Binary file added print/pdfs/name2age.pdf
Binary file not shown.
Binary file added print/pdfs/night_sight.pdf
Binary file not shown.
Binary file added print/pdfs/normal.pdf
Binary file not shown.
Binary file added print/pdfs/notation.pdf
Binary file not shown.
Binary file added print/pdfs/p_hacking.pdf
Binary file not shown.
Binary file added print/pdfs/parameter_estimation.pdf
Binary file not shown.
Binary file added print/pdfs/pmf.pdf
Binary file not shown.
Binary file added print/pdfs/poisson.pdf
Binary file not shown.
Binary file added print/pdfs/prob_and.pdf
Binary file not shown.
Binary file added print/pdfs/prob_baby_delivery.pdf
Binary file not shown.
Binary file added print/pdfs/prob_or.pdf
Binary file not shown.
Binary file added print/pdfs/probability.pdf
Binary file not shown.
Binary file added print/pdfs/python.pdf
Binary file not shown.
Binary file added print/pdfs/random_shuffles.pdf
Binary file not shown.
Binary file added print/pdfs/rvs.pdf
Binary file not shown.
Binary file added print/pdfs/samples.pdf
Binary file not shown.
Binary file added print/pdfs/serendipity.pdf
Binary file not shown.
Binary file added print/pdfs/summation_vars.pdf
Binary file not shown.
Binary file added print/pdfs/thompson.pdf
Binary file not shown.
Binary file added print/pdfs/titlepage.pdf
Binary file not shown.
Binary file added print/pdfs/tracking_in_2D.pdf
Binary file not shown.
Binary file added print/pdfs/uniform.pdf
Binary file not shown.
Binary file added print/pdfs/variance.pdf
Binary file not shown.
Binary file added print/pdfs/winning_series.pdf
Binary file not shown.
109 changes: 109 additions & 0 deletions print/printbook.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import hjson
import PyPDF2
import pdfgenerator
import re
import os

# def create_pdfs():
# Opening JSON file
f = open("../bookOutline.hjson")
# returns JSON object as a dictionary
data = hjson.load(f)
# Closing file
f.close()

# folder to store pdfs
if not os.path.exists('pdfs'):
os.mkdir('pdfs')

# base url for all pages
base = 'https://chrispiech.github.io/probabilityForComputerScientists/en/'

# get pdf for title page
pdf_link = base + 'index.html'
title_name = 'titlepage.pdf'
if (not os.path.exists(os.path.join('pdfs', title_name))):
# generate pdf file
pdf_file = pdfgenerator.PdfGenerator([pdf_link]).main()
# save pdf to file
with open(os.path.join('pdfs', title_name), "wb") as outfile:
outfile.write(pdf_file[0].getbuffer())

# get pdf_name and pdf_link for book from bookOutline and store in pdf_files
pdf_files = {}

for part in data:
pdf_files[part] = {'sections':{}}
for page in data[part]['sections']:
title = data[part]['sections'][page]
pdf_name = page + '.pdf'
pdf_link = base + part + '/' + page
# store pdf_name and title
pdf_files[part]['sections'][pdf_name] = title
# check if pdf already exists
if (not os.path.exists(os.path.join('pdfs', pdf_name))):
# generate pdf file
pdf_file = pdfgenerator.PdfGenerator([pdf_link]).main()
# save pdf to file
with open(os.path.join('pdfs', pdf_name), "wb") as outfile:
outfile.write(pdf_file[0].getbuffer())
if 'examples' in data[part].keys():
pdf_files[part]['examples'] = {}
for page in data[part]['examples']:
title = data[part]['examples'][page]
pdf_name = page + '.pdf'
pdf_link = base + 'examples' + '/' + page
# store pdf_name and title
pdf_files[part]['examples'][pdf_name] = title
# check if pdf already exists
if (not os.path.exists(os.path.join('pdfs', pdf_name))):
# generate pdf file
pdf_file = pdfgenerator.PdfGenerator([pdf_link]).main()
# save pdf to file
with open(os.path.join('pdfs', pdf_name), "wb") as outfile:
outfile.write(pdf_file[0].getbuffer())

# Output PDF file name
output_pdf = "CS109Book.pdf"

# Create a PDF file writer object
pdf_writer = PyPDF2.PdfWriter()

# add title page
pdf_writer.append(os.path.join('pdfs', title_name))

page_num = 1
for part in pdf_files:
title = data[part]['title']
if title is None:
title = "Introduction"
# create outline for parts
part_outline = pdf_writer.add_outline_item(title, page_num)
# add pdf files to table of contents and book
for pdf_file, title in pdf_files[part]['sections'].items():
# Open the pdf
pdf_reader = PyPDF2.PdfReader(open(os.path.join('pdfs', pdf_file), "rb"))
# Create an outline item for the pdf
pdf_outline = pdf_writer.add_outline_item(title, page_num, parent=part_outline)
# add pdf file to book pdf
pdf_writer.append(os.path.join('pdfs', pdf_file))
# start of next pdf
page_num += len(pdf_reader.pages)
if 'examples' in pdf_files[part].keys():
# create outline for examples
examples_outline = pdf_writer.add_outline_item('Applications', page_num, parent=part_outline)
# add pdf files to table of contents and book
for pdf_file, title in pdf_files[part]['examples'].items():
# Open the pdf
pdf_reader = PyPDF2.PdfReader(open(os.path.join('pdfs', pdf_file), "rb"))
# Create an outline item for the pdf
pdf_outline = pdf_writer.add_outline_item(title, page_num, parent=examples_outline)
pdf_writer.append(os.path.join('pdfs', pdf_file))
# start of next pdf
page_num += len(pdf_reader.pages)

# Save the merged PDF with the TOC
with open(output_pdf, "wb") as output_file:
pdf_writer.write(output_file)

print(f"Merged PDF with Table of Contents saved as {output_pdf}")

0 comments on commit d45186b

Please sign in to comment.