Skip to content

Commit

Permalink
Added fixes noted in PR #205 comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
PokkeFe committed Jun 21, 2021
1 parent fa06f53 commit 5b5bf75
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 47 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ numpy>=1.17
pandas>=1.0.3
pyarrow>=1.0.0
regex
# TODO: The following dependency is to support compatibility with Python 3.6, and should be removed when that version's support is dropped
importlib_resources
# TODO: The following dependency should go away when we switch to Python 3.8.
memoized-property
Expand Down
27 changes: 12 additions & 15 deletions text_extensions_for_pandas/jupyter.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@
import numpy as np
import time
from typing import *
import text_extensions_for_pandas.resources as resources
import text_extensions_for_pandas.resources

# TODO: This try/except block is for Python 3.6 support, and should be reduced to just importing importlib.resources when 3.6 support is dropped.
try:
import importlib.resources as pkg_resources
except ImportError:
Expand Down Expand Up @@ -87,14 +88,12 @@ def _get_sanitized_doctext(column: Union["SpanArray", "TokenSpanArray"]) -> List

text_pieces = []
for i in range(len(text)):
if text[i] == "`":
text_pieces.append("\\`")
if text[i] == "'":
text_pieces.append("\\'")
else:
text_pieces.append(text[i])
return "".join(text_pieces)

_spanarray_instance_counter = 0

def pretty_print_html(column: Union["SpanArray", "TokenSpanArray"],
show_offsets: bool) -> str:
"""
Expand All @@ -112,9 +111,6 @@ def pretty_print_html(column: Union["SpanArray", "TokenSpanArray"],
raise TypeError(f"Expected SpanArray or TokenSpanArray, but received "
f"{column} of type {type(column)}")

global _spanarray_instance_counter
_spanarray_instance_counter += 1

# Get a javascript representation of the column
span_array = []
for e in column:
Expand All @@ -124,25 +120,26 @@ def pretty_print_html(column: Union["SpanArray", "TokenSpanArray"],
style_text = ""
script_text = ""

style_text = pkg_resources.read_text(resources, "span_array.css")
script_text = pkg_resources.read_text(resources, "span_array.js")
style_text = pkg_resources.read_text(text_extensions_for_pandas.resources, "span_array.css")
script_text = pkg_resources.read_text(text_extensions_for_pandas.resources, "span_array.js")

return textwrap.dedent(f"""
<div class="span-array" data-instance="{_spanarray_instance_counter}">
<div class="span-array">
If you're reading this message, your notebook viewer does not support Javascript execution. Try pasting the URL into a service like nbviewer.
</div>
<style>
{textwrap.indent(style_text, " ")}
{textwrap.indent(style_text, ' ')}
</style>
<script>
{{
{textwrap.indent(script_text, " ")}
{textwrap.indent(script_text, ' ')}
const Entry = window.SpanArray.Entry
const render = window.SpanArray.render
const spanArray = [{','.join(span_array)}]
const entries = Entry.fromSpanArray(spanArray)
const doc_text = `{_get_sanitized_doctext(column)}`
render(doc_text, entries, {_spanarray_instance_counter}, {'true' if show_offsets else 'false'})
const doc_text = '{_get_sanitized_doctext(column)}'
const script_context = document.currentScript
render(doc_text, entries, {'true' if show_offsets else 'false'}, script_context)
}}
</script>
""")
9 changes: 5 additions & 4 deletions text_extensions_for_pandas/resources/span_array.css
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
--fallback-font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif
}

body[data-jp-theme-light="false"] .span-array {
/* These 2 CSS block sets variable overrides for JupyterLab's default themes */
body[data-jp-theme-name="JupyterLab Dark"] .span-array {
--thead-background-color: #144552;
--thead-text-color: whitesmoke;
--tbody-background-color-1: #0B525B;
Expand All @@ -30,7 +31,7 @@ body[data-jp-theme-light="false"] .span-array {
--paragraph-border-color: #1b1b1b;
}

body[data-jp-theme-light="true"] .span-array {
body[data-jp-theme-name="JupyterLab Light"] .span-array {
--thead-background-color: #0d2025;
--thead-text-color: whitesmoke;
--tbody-background-color-1: #f0f0f0;
Expand All @@ -41,7 +42,7 @@ body[data-jp-theme-light="true"] .span-array {
--paragraph-border-color: #f1f1f1;
}

/* Table */
/* Table of span offsets */
.span-array>table {
table-layout: auto;
border-radius: 1em 1em 0 0;
Expand Down Expand Up @@ -83,7 +84,7 @@ body[data-jp-theme-light="true"] .span-array {
text-align: left;
}

/* Paragraph */
/* Styling for spans within document context */
.span-array>p {
border:1px solid var(--paragraph-border-color);
border-radius: 0.2em;
Expand Down
37 changes: 19 additions & 18 deletions text_extensions_for_pandas/resources/span_array.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Increment the version to invalidate the cached script
const VERSION = 0.5
const VERSION = 0.58

if(!window.SpanArray || window.SpanArray.VERSION < VERSION) {

Expand All @@ -21,10 +21,12 @@ if(!window.SpanArray || window.SpanArray.VERSION < VERSION) {
out = out.replace("&", "&amp;")
out = out.replace("<", "&lt;")
out = out.replace(">", "&gt;")
out = out.replace("$", "&#36;")
out = out.replace("\"", "&quot;")
return out;
}

/** Models an instance of a Span rendered within the offset table and document context. */
class Entry {

// Creates an ordered list of entries from a list of spans with struct [begin, end]
Expand All @@ -48,15 +50,13 @@ if(!window.SpanArray || window.SpanArray.VERSION < VERSION) {

let set;
for(let i = 0; i < entries.length; i++) {
for(let j = i+1; j < entries.length && entries[j].begin <= entries[i].end; j++) {
if(entries[j].begin < entries[i].end) {
if(entries[j].end <= entries[i].end) {
set = {type: TYPE_NESTED, entry: entries[j]}
} else {
set = {type: TYPE_OVERLAP, entry: entries[j]}
}
entries[i].sets.push(set)
for(let j = i+1; j < entries.length && entries[j].begin < entries[i].end; j++) {
if(entries[j].end <= entries[i].end) {
set = {type: TYPE_NESTED, entry: entries[j]}
} else {
set = {type: TYPE_OVERLAP, entry: entries[j]}
}
entries[i].sets.push(set)
}
}

Expand Down Expand Up @@ -117,7 +117,7 @@ if(!window.SpanArray || window.SpanArray.VERSION < VERSION) {
window.SpanArray.Entry = Entry

// Render DOM
function render(doc_text, entries, instance_id, show_offsets) {
function render(doc_text, entries, show_offsets, script_context) {

let frag = document.createDocumentFragment()

Expand All @@ -142,11 +142,13 @@ if(!window.SpanArray || window.SpanArray.VERSION < VERSION) {
row.classList.add("disabled")
}

// Adds the span entry to the table. doc_text is sanitized by replacing the reserved
// symbols by their entity name representations
row.innerHTML += `
<td><b>${entry.id.toString()}</b></td>
<td>${entry.begin}</td>
<td>${entry.end}</td>
<td>${doc_text.substring(entry.begin, entry.end)}</td>`
<td>${sanitize(doc_text.substring(entry.begin, entry.end))}</td>`

tbody.appendChild(row)
})
Expand Down Expand Up @@ -183,6 +185,7 @@ if(!window.SpanArray || window.SpanArray.VERSION < VERSION) {
paragraph.innerHTML += sanitize(doc_text.substring(begin, region.begin))

let mark = document.createElement("mark")
// The data-ids tag is a list of comma-separated reference IDs for matching Spans
mark.setAttribute("data-ids", "");
if (region.type != TYPE_NESTED) {
region.ids.forEach(id => {
Expand All @@ -194,7 +197,7 @@ if(!window.SpanArray || window.SpanArray.VERSION < VERSION) {
let nested_begin = region.begin
region.ids.slice(1).forEach(nested_id => {
let nested_region = entries.find(entry => entry.id == nested_id)
mark.innerHTML += doc_text.substring(nested_begin, nested_region.begin)
mark.innerHTML += sanitize(doc_text.substring(nested_begin, nested_region.begin))
let nested_mark = document.createElement("mark")
nested_mark.setAttribute("data-ids", `${nested_id},`)
nested_mark.textContent = doc_text.substring(nested_region.begin, nested_region.end)
Expand All @@ -221,12 +224,10 @@ if(!window.SpanArray || window.SpanArray.VERSION < VERSION) {
frag.appendChild(paragraph)

// Attach fragments to all copies of the instance
let containers = document.querySelectorAll(`.span-array[data-instance='${instance_id}']`)
containers.forEach(container => {
let cloned_frag = frag.cloneNode(true)
container.innerHTML = ""
container.appendChild(cloned_frag)
})
let context = script_context.parentElement
let container = context.querySelector(".span-array")
container.innerHTML = ""
container.appendChild(frag);
}

window.SpanArray.render = render
Expand Down
18 changes: 8 additions & 10 deletions text_extensions_for_pandas/test_jupyter.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,13 @@ def test_pretty_print_html(self):
self.assertEqual(
suffix,
"""\
ndow.SpanArray.render = render
}
const Entry = window.SpanArray.Entry
ray.Entry
const render = window.SpanArray.render
const spanArray = [[0,4],[4,6],[7,10],[11,12],[13,14],[14,17],[18,19],[20,26]]
const entries = Entry.fromSpanArray(spanArray)
const doc_text = `Item's for < $100 & change`
render(doc_text, entries, 1, true)
const doc_text = 'Item\\'s for < $100 & change'
const script_context = document.currentScript
render(doc_text, entries, true, script_context)
}
</script>
""")
Expand All @@ -59,14 +58,13 @@ def test_pretty_print_html(self):
self.assertEqual(
suffix,
"""\
dow.SpanArray.render = render
}
const Entry = window.SpanArray.Entry
ay.Entry
const render = window.SpanArray.render
const spanArray = [[0,4],[4,6],[7,10],[11,12],[13,14],[14,17],[18,19],[20,26]]
const entries = Entry.fromSpanArray(spanArray)
const doc_text = `Item's for < $100 & change`
render(doc_text, entries, 2, false)
const doc_text = 'Item\\'s for < $100 & change'
const script_context = document.currentScript
render(doc_text, entries, false, script_context)
}
</script>
""")

0 comments on commit 5b5bf75

Please sign in to comment.