Added fixes noted in PR #205 comments.

CODAIT · Jun 21, 2021 · 5b5bf75 · 5b5bf75
1 parent fa06f53
commit 5b5bf75
Show file tree

Hide file tree

Showing 5 changed files with 45 additions and 47 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -2,6 +2,7 @@ numpy>=1.17
 pandas>=1.0.3
 pyarrow>=1.0.0
 regex
+# TODO: The following dependency is to support compatibility with Python 3.6, and should be removed when that version's support is dropped
 importlib_resources
 # TODO: The following dependency should go away when we switch to Python 3.8.
 memoized-property

diff --git a/text_extensions_for_pandas/jupyter.py b/text_extensions_for_pandas/jupyter.py
@@ -30,8 +30,9 @@
 import numpy as np
 import time
 from typing import *
-import text_extensions_for_pandas.resources as resources
+import text_extensions_for_pandas.resources
 
+# TODO: This try/except block is for Python 3.6 support, and should be reduced to just importing importlib.resources when 3.6 support is dropped.
 try:
  import importlib.resources as pkg_resources
 except ImportError:
@@ -87,14 +88,12 @@ def _get_sanitized_doctext(column: Union["SpanArray", "TokenSpanArray"]) -> List
 
  text_pieces = []
  for i in range(len(text)):
- if text[i] == "`":
- text_pieces.append("\\`")
+ if text[i] == "'":
+ text_pieces.append("\\'")
  else:
  text_pieces.append(text[i])
  return "".join(text_pieces)
 
-_spanarray_instance_counter = 0
-
 def pretty_print_html(column: Union["SpanArray", "TokenSpanArray"],
  show_offsets: bool) -> str:
  """
@@ -112,9 +111,6 @@ def pretty_print_html(column: Union["SpanArray", "TokenSpanArray"],
  raise TypeError(f"Expected SpanArray or TokenSpanArray, but received "
  f"{column} of type {type(column)}")
 
- global _spanarray_instance_counter 
- _spanarray_instance_counter += 1
-
  # Get a javascript representation of the column
  span_array = []
  for e in column:
@@ -124,25 +120,26 @@ def pretty_print_html(column: Union["SpanArray", "TokenSpanArray"],
  style_text = ""
  script_text = ""
 
- style_text = pkg_resources.read_text(resources, "span_array.css")
- script_text = pkg_resources.read_text(resources, "span_array.js")
+ style_text = pkg_resources.read_text(text_extensions_for_pandas.resources, "span_array.css")
+ script_text = pkg_resources.read_text(text_extensions_for_pandas.resources, "span_array.js")
 
  return textwrap.dedent(f"""
- <div class="span-array" data-instance="{_spanarray_instance_counter}">
+ <div class="span-array">
  If you're reading this message, your notebook viewer does not support Javascript execution. Try pasting the URL into a service like nbviewer.
  </div>
  <style>
- {textwrap.indent(style_text, " ")}
+ {textwrap.indent(style_text, ' ')}
  </style>
  <script>
  {{
- {textwrap.indent(script_text, " ")}
+ {textwrap.indent(script_text, ' ')}
  const Entry = window.SpanArray.Entry
  const render = window.SpanArray.render
  const spanArray = [{','.join(span_array)}]
  const entries = Entry.fromSpanArray(spanArray)
- const doc_text = `{_get_sanitized_doctext(column)}`
- render(doc_text, entries, {_spanarray_instance_counter}, {'true' if show_offsets else 'false'})
+ const doc_text = '{_get_sanitized_doctext(column)}'
+ const script_context = document.currentScript
+ render(doc_text, entries, {'true' if show_offsets else 'false'}, script_context)
  }}
  </script>
  """)
diff --git a/text_extensions_for_pandas/resources/span_array.css b/text_extensions_for_pandas/resources/span_array.css
@@ -19,7 +19,8 @@
  --fallback-font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif
 }
 
-body[data-jp-theme-light="false"] .span-array {
+/* These 2 CSS block sets variable overrides for JupyterLab's default themes */
+body[data-jp-theme-name="JupyterLab Dark"] .span-array {
  --thead-background-color: #144552;
  --thead-text-color: whitesmoke;
  --tbody-background-color-1: #0B525B;
@@ -30,7 +31,7 @@ body[data-jp-theme-light="false"] .span-array {
  --paragraph-border-color: #1b1b1b;
 }
 
-body[data-jp-theme-light="true"] .span-array {
+body[data-jp-theme-name="JupyterLab Light"] .span-array {
  --thead-background-color: #0d2025;
  --thead-text-color: whitesmoke;
  --tbody-background-color-1: #f0f0f0;
@@ -41,7 +42,7 @@ body[data-jp-theme-light="true"] .span-array {
  --paragraph-border-color: #f1f1f1;
 }
 
-/* Table */
+/* Table of span offsets */
 .span-array>table {
  table-layout: auto;
  border-radius: 1em 1em 0 0;
@@ -83,7 +84,7 @@ body[data-jp-theme-light="true"] .span-array {
  text-align: left;
 }
 
-/* Paragraph */
+/* Styling for spans within document context */
 .span-array>p {
  border:1px solid var(--paragraph-border-color);
  border-radius: 0.2em;

diff --git a/text_extensions_for_pandas/resources/span_array.js b/text_extensions_for_pandas/resources/span_array.js
@@ -1,5 +1,5 @@
 // Increment the version to invalidate the cached script
-const VERSION = 0.5
+const VERSION = 0.58
 
 if(!window.SpanArray || window.SpanArray.VERSION < VERSION) {
 
@@ -21,10 +21,12 @@ if(!window.SpanArray || window.SpanArray.VERSION < VERSION) {
  out = out.replace("&", "&amp;")
  out = out.replace("<", "&lt;")
  out = out.replace(">", "&gt;")
+ out = out.replace("$", "&#36;")
  out = out.replace("\"", "&quot;")
  return out;
  }
 
+ /** Models an instance of a Span rendered within the offset table and document context. */
  class Entry {
 
  // Creates an ordered list of entries from a list of spans with struct [begin, end]
@@ -48,15 +50,13 @@ if(!window.SpanArray || window.SpanArray.VERSION < VERSION) {
 
  let set;
  for(let i = 0; i < entries.length; i++) {
- for(let j = i+1; j < entries.length && entries[j].begin <= entries[i].end; j++) {
- if(entries[j].begin < entries[i].end) {
- if(entries[j].end <= entries[i].end) {
- set = {type: TYPE_NESTED, entry: entries[j]}
- } else {
- set = {type: TYPE_OVERLAP, entry: entries[j]}
- }
- entries[i].sets.push(set)
+ for(let j = i+1; j < entries.length && entries[j].begin < entries[i].end; j++) {
+ if(entries[j].end <= entries[i].end) {
+ set = {type: TYPE_NESTED, entry: entries[j]}
+ } else {
+ set = {type: TYPE_OVERLAP, entry: entries[j]}
  }
+ entries[i].sets.push(set)
  }
  }
 
@@ -117,7 +117,7 @@ if(!window.SpanArray || window.SpanArray.VERSION < VERSION) {
  window.SpanArray.Entry = Entry
 
  // Render DOM
- function render(doc_text, entries, instance_id, show_offsets) {
+ function render(doc_text, entries, show_offsets, script_context) {
 
  let frag = document.createDocumentFragment()
 
@@ -142,11 +142,13 @@ if(!window.SpanArray || window.SpanArray.VERSION < VERSION) {
  row.classList.add("disabled")
  }
 
+ // Adds the span entry to the table. doc_text is sanitized by replacing the reserved
+ // symbols by their entity name representations
  row.innerHTML += `
  <td><b>${entry.id.toString()}</b></td>
  <td>${entry.begin}</td>
  <td>${entry.end}</td>
- <td>${doc_text.substring(entry.begin, entry.end)}</td>`
+ <td>${sanitize(doc_text.substring(entry.begin, entry.end))}</td>`
 
  tbody.appendChild(row)
  })
@@ -183,6 +185,7 @@ if(!window.SpanArray || window.SpanArray.VERSION < VERSION) {
  paragraph.innerHTML += sanitize(doc_text.substring(begin, region.begin))
 
  let mark = document.createElement("mark")
+ // The data-ids tag is a list of comma-separated reference IDs for matching Spans 
  mark.setAttribute("data-ids", "");
  if (region.type != TYPE_NESTED) {
  region.ids.forEach(id => {
@@ -194,7 +197,7 @@ if(!window.SpanArray || window.SpanArray.VERSION < VERSION) {
  let nested_begin = region.begin
  region.ids.slice(1).forEach(nested_id => {
  let nested_region = entries.find(entry => entry.id == nested_id)
- mark.innerHTML += doc_text.substring(nested_begin, nested_region.begin)
+ mark.innerHTML += sanitize(doc_text.substring(nested_begin, nested_region.begin))
  let nested_mark = document.createElement("mark")
  nested_mark.setAttribute("data-ids", `${nested_id},`)
  nested_mark.textContent = doc_text.substring(nested_region.begin, nested_region.end)
@@ -221,12 +224,10 @@ if(!window.SpanArray || window.SpanArray.VERSION < VERSION) {
  frag.appendChild(paragraph)
 
  // Attach fragments to all copies of the instance
- let containers = document.querySelectorAll(`.span-array[data-instance='${instance_id}']`)
- containers.forEach(container => {
- let cloned_frag = frag.cloneNode(true)
- container.innerHTML = ""
- container.appendChild(cloned_frag)
- })
+ let context = script_context.parentElement
+ let container = context.querySelector(".span-array")
+ container.innerHTML = ""
+ container.appendChild(frag);
  }
 
  window.SpanArray.render = render

diff --git a/text_extensions_for_pandas/test_jupyter.py b/text_extensions_for_pandas/test_jupyter.py
@@ -41,14 +41,13 @@ def test_pretty_print_html(self):
  self.assertEqual(
  suffix,
  """\
-ndow.SpanArray.render = render
-}
- const Entry = window.SpanArray.Entry
+ray.Entry
  const render = window.SpanArray.render
  const spanArray = [[0,4],[4,6],[7,10],[11,12],[13,14],[14,17],[18,19],[20,26]]
  const entries = Entry.fromSpanArray(spanArray)
- const doc_text = `Item's for < $100 & change`
- render(doc_text, entries, 1, true)
+ const doc_text = 'Item\\'s for < $100 & change'
+ const script_context = document.currentScript
+ render(doc_text, entries, true, script_context)
  }
 </script>
 """)
@@ -59,14 +58,13 @@ def test_pretty_print_html(self):
  self.assertEqual(
  suffix,
  """\
-dow.SpanArray.render = render
-}
- const Entry = window.SpanArray.Entry
+ay.Entry
  const render = window.SpanArray.render
  const spanArray = [[0,4],[4,6],[7,10],[11,12],[13,14],[14,17],[18,19],[20,26]]
  const entries = Entry.fromSpanArray(spanArray)
- const doc_text = `Item's for < $100 & change`
- render(doc_text, entries, 2, false)
+ const doc_text = 'Item\\'s for < $100 & change'
+ const script_context = document.currentScript
+ render(doc_text, entries, false, script_context)
  }
 </script>
 """)