Skip to content
This repository has been archived by the owner on Jan 11, 2023. It is now read-only.

Commit

Permalink
Improve and test notebook parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelmeuli committed Mar 15, 2020
1 parent c05bc2f commit 30681fc
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 6 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ require (
github.com/pkg/errors v0.9.1 // indirect
github.com/russross/blackfriday/v2 v2.0.1
github.com/shurcooL/sanitized_anchor_name v1.0.0 // indirect
github.com/stretchr/testify v1.4.0 // indirect
github.com/stretchr/testify v1.5.1
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527 // indirect
)
5 changes: 3 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,12 @@ github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ=
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
golang.org/x/sys v0.0.0-20181128092732-4ed8d59d0b35 h1:YAFjXN64LMvktoUZH9zgY4lGc/msGN7HQfoSuKCgaDU=
golang.org/x/sys v0.0.0-20181128092732-4ed8d59d0b35/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand Down
25 changes: 22 additions & 3 deletions parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,30 @@ import (
"encoding/json"
)

// Documentation of the Jupyter Notebook JSON format: https://ipython.org/ipython-doc/3/notebook/nbformat.html
// (VCS: https://github.com/ipython/ipython-doc/blob/e9c83570cf3dea6d7a6b178ee59869b4f441220f/3/notebook/nbformat.html)

// OutputData can contain the cell output in various data types
// Source: https://github.com/jupyter/nbconvert/blob/c837a22d44d98f6a58d1934bd85af1506df48f21/nbconvert/utils/base.py#L16
type OutputData struct {
TextHTML []string `json:"text/html,omitempty"`
ApplicationPDF *string `json:"application/pdf,omitempty"`
TextLaTeX *string `json:"text/latex,omitempty"`
ImageSVGXML []string `json:"image/svg+xml,omitempty"`
ImagePNG *string `json:"image/png,omitempty"`
ImageJPEG *string `json:"image/jpeg,omitempty"`
TextMarkdown []string `json:"text/markdown,omitempty"`
TextPlain []string `json:"text/plain,omitempty"`
}

// Output is the result of a code cell's execution in a Jupyter Notebook
type Output struct {
Name string `json:"name"`
OutputType string `json:"output_type"`
Text []string `json:"text,omitempty"`
OutputType string `json:"output_type"`
ExecutionCount *int `json:"execution_count,omitempty"`
Text []string `json:"text,omitempty"`
Data OutputData `json:"data,omitempty"`
Traceback []string `json:"traceback,omitempty"`
// Omitted fields: "ename", "evalue", "name"
}

// Cell is a single Jupyter Notebook cell
Expand Down
160 changes: 160 additions & 0 deletions parse_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
package nbtohtml

import (
"github.com/stretchr/testify/assert"
"testing"
)

// Documentation of the Jupyter Notebook JSON format: https://ipython.org/ipython-doc/3/notebook/nbformat.html
// (VCS: https://github.com/ipython/ipython-doc/blob/e9c83570cf3dea6d7a6b178ee59869b4f441220f/3/notebook/nbformat.html)
const notebookString = `{
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4,
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"some *markdown*"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true,
"autoscroll": false
},
"source": [
"some code"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"multiline stream text"
]
},
{
"output_type": "display_data",
"data": {
"image/png": "base64-encoded-png-data"
},
"metadata": {
"image/png": {
"width": 640,
"height": 480
}
}
},
{
"output_type": "execute_result",
"execution_count": 42,
"data": {
"text/plain": [
"multiline text data"
]
}
},
{
"output_type": "error",
"ename": "Some error name",
"evalue": "Some error value",
"traceback": [
"Trace part 1",
"Trace part 2"
]
}
]
},
{
"cell_type": "raw",
"metadata": {
"format": "mime/type"
},
"source": [
"some nbformat mime-type data"
]
}
]
}`

var base64PngString = "base64-encoded-png-data"
var executionCount1 = 1
var executionCount42 = 42
var expected = Notebook{
Cells: []Cell{
{
CellType: "markdown",
Source: []string{"some *markdown*"},
},
{
CellType: "code",
ExecutionCount: &executionCount1,
Source: []string{
"some code",
},
Outputs: []Output{
{
OutputType: "stream",
Text: []string{"multiline stream text"},
},
{
OutputType: "display_data",
Data: OutputData{
ImagePNG: &base64PngString,
},
},
{
OutputType: "execute_result",
ExecutionCount: &executionCount42,
Data: OutputData{
TextPlain: []string{"multiline text data"},
},
},
{
OutputType: "error",
Traceback: []string{
"Trace part 1",
"Trace part 2",
},
},
},
},
{
CellType: "raw",
Source: []string{"some nbformat mime-type data"},
},
},
Metadata: Metadata{
LanguageInfo: LanguageInfo{
FileExtension: ".py",
},
},
}

func TestParseNotebook(t *testing.T) {
actual, err := parseNotebook(notebookString)
assert.NoError(t, err)
assert.Equal(t, expected, actual)
}

0 comments on commit 30681fc

Please sign in to comment.