Skip to content
This repository has been archived by the owner on Jan 11, 2023. It is now read-only.

Commit

Permalink
Improve kernel language identification
Browse files Browse the repository at this point in the history
Metadata fields in the Jupyter Notebook JSON are optional. Therefore, multiple JSON fields are now checked for hints about the kernel language. If no information is found, Chroma will try to infer the language from the file content
  • Loading branch information
samuelmeuli committed Mar 24, 2020
1 parent 582a872 commit 74d502b
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 17 deletions.
28 changes: 18 additions & 10 deletions convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ func convertMarkdownCell(cell cell) template.HTML {
}

// convertCodeCell converts a code cell to HTML with classes for syntax highlighting.
func convertCodeCell(cell cell, fileExtension string) template.HTML {
func convertCodeCell(cell cell, languageID string) template.HTML {
sourceString := strings.Join(cell.Source, "")
cellHTML, err := renderSourceCode(sourceString, fileExtension)
cellHTML, err := renderSourceCode(sourceString, languageID)

// Render code without syntax highlighting if an error occurred
if err != nil {
Expand Down Expand Up @@ -116,12 +116,12 @@ func convertPrompt(executionCount *int) template.HTML {
}

// convertOutput converts the provided cell input to HTML.
func convertInput(fileExtension string, cell cell) template.HTML {
func convertInput(languageID string, cell cell) template.HTML {
switch cell.CellType {
case "markdown":
return convertMarkdownCell(cell)
case "code":
return convertCodeCell(cell, fileExtension)
return convertCodeCell(cell, languageID)
case "raw":
return convertRawCell(cell)
default:
Expand Down Expand Up @@ -187,8 +187,16 @@ func ConvertString(writer io.Writer, notebookString string) error {
)
}

// Get format extension of Jupyter Kernel language (e.g. "py")
fileExtension := notebook.Metadata.LanguageInfo.FileExtension[1:]
// Try to find information about programming language used by the notebook kernel. Metadata fields
// in the Jupyter Notebook JSON are optional, so multiple fields are checked
languageID := ""
if fileExtensionPtr := notebook.Metadata.LanguageInfo.FileExtension; fileExtensionPtr != nil {
languageID = (*fileExtensionPtr)[1:]
} else if kernelLanguagePtr := notebook.Metadata.KernelSpec.Language; kernelLanguagePtr != nil {
languageID = *kernelLanguagePtr
} else if kernelNamePtr := notebook.Metadata.KernelSpec.Name; kernelNamePtr != nil {
languageID = *kernelNamePtr
}

t := template.New("notebook")
t = t.Funcs(template.FuncMap{
Expand All @@ -205,15 +213,15 @@ func ConvertString(writer io.Writer, notebookString string) error {
})
t, err = t.Parse(`
<div class="notebook">
{{ $fileExtension := .fileExtension }}
{{ $languageID := .languageID }}
{{ range .notebook.Cells }}
<div class="{{ . | getCellClasses }}">
<div class="input-wrapper">
<div class="input-prompt">
{{ .ExecutionCount | convertPrompt }}
</div>
<div class="input">
{{ . | convertInput $fileExtension }}
{{ . | convertInput $languageID }}
</div>
</div>
{{ range .Outputs }}
Expand All @@ -235,8 +243,8 @@ func ConvertString(writer io.Writer, notebookString string) error {
}

templateVars := map[string]interface{}{
"fileExtension": fileExtension,
"notebook": notebook,
"languageID": languageID,
"notebook": notebook,
}
return t.Execute(writer, templateVars)
}
13 changes: 10 additions & 3 deletions parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,22 @@ type cell struct {

// languageInfo provides details about the programming language of the Jupyter Notebook kernel.
type languageInfo struct {
FileExtension string `json:"file_extension"`
// Omitted fields: codemirror_mode", "mimetype", "name", "nbconvert_exporter", "pygments_lexer",
FileExtension *string `json:"file_extension,omitempty"`
// Omitted fields: "codemirror_mode", "mimetype", "name", "nbconvert_exporter", "pygments_lexer",
// "version"
}

// kernelSpec provides details about the Jupyter Notebook kernel.
type kernelSpec struct {
DisplayName *string `json:"display_name,omitempty"`
Language *string `json:"language,omitempty"`
Name *string `json:"name,omitempty"`
}

// metadata contains additional information about the Jupyter Notebook.
type metadata struct {
LanguageInfo languageInfo `json:"language_info"`
// Omitted fields: "kernelspec"
KernelSpec kernelSpec `json:"kernelspec"`
}

// notebook represents the JSON data structure in which a Jupyter Notebook is stored.
Expand Down
9 changes: 6 additions & 3 deletions render.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@ func renderMarkdown(markdownLines []string) template.HTML {
// renderSourceCode uses the Chroma library to convert the provided source code string to HTML.
// Instead of inline styles, HTML classes are used for syntax highlighting, which allows the users
// to style source code according to their needs.
func renderSourceCode(source string, fileExtension string) (template.HTML, error) {
func renderSourceCode(source string, languageID string) (template.HTML, error) {
sourceBuffer := new(bytes.Buffer)

// Set up lexer for file extension
l := lexers.Get(fileExtension)
// Set up lexer for programming language
var l chroma.Lexer
if languageID != "" {
l = lexers.Get(languageID)
}
if l == nil {
l = lexers.Analyse(source)
}
Expand Down
11 changes: 10 additions & 1 deletion test_data.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,9 +383,18 @@ var testRawCellCodeInjection = cell{
},
}

var testFileExtension = ".py"
var testKernelLanguage = "python"
var testKernelDisplayName = "Python 3"
var testKernelName = "python3"
var testMetadata = metadata{
LanguageInfo: languageInfo{
FileExtension: ".py",
FileExtension: &testFileExtension,
},
KernelSpec: kernelSpec{
DisplayName: &testKernelDisplayName,
Language: &testKernelLanguage,
Name: &testKernelName,
},
}

Expand Down

0 comments on commit 74d502b

Please sign in to comment.