-
Notifications
You must be signed in to change notification settings - Fork 206
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
75c81a1
commit 7713256
Showing
1 changed file
with
62 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,66 @@ | ||
import { read as readCFB, find, CFB$Container } from "cfb"; | ||
import { WJSDoc } from "../../types"; | ||
import { JSDOM } from "jsdom"; | ||
import { WJSDoc, WJSPara } from "../../types"; | ||
|
||
/* ECMA 17.3.1.22 p CT_P */ | ||
function process_para(child: Node, root: WJSPara) { | ||
switch(child.nodeType) { | ||
case 1 /* ELEMENT_NODE */: | ||
const element = (child as Element); | ||
switch(element.tagName) { | ||
case "w:r": | ||
case "w:sdt": | ||
case "w:sdtContent": | ||
element.childNodes.forEach((child) => process_para(child, root)); | ||
break; | ||
case "w:t": | ||
root.elts.push({t: "s", v: child.textContent}); break; | ||
default: throw "unsupported node type " + child.nodeType; | ||
} | ||
break; | ||
} | ||
}; | ||
|
||
function process_body_elt(child: ChildNode, root: boolean = false): WJSPara|void { | ||
const para: WJSPara = {elts : []}; | ||
switch(child.nodeType) { | ||
case 1: /* ELEMENT_NODE */ | ||
const element = (child as Element); | ||
switch(element.tagName) { | ||
case "w:p": | ||
element.childNodes.forEach((child) => process_para(child, para)); | ||
return para; | ||
case "w:tbl": | ||
case "w:customXML": | ||
if(root) break; | ||
default: throw `DOCX body unsupported ${element.tagName} element` | ||
} | ||
break; | ||
} | ||
} | ||
|
||
export function parse_cfb(file: CFB$Container): WJSDoc { | ||
throw "DOCX not supported"; | ||
// Get content of document.xml | ||
const buf = find(file, "/word/document.xml").content; | ||
|
||
// Parse with JSDOM | ||
const dom = new JSDOM((buf as Buffer).toString(), {contentType: "text/xml"}); | ||
|
||
const docx: WJSDoc = {p: []} | ||
|
||
const rootelt = dom.window.document.children[0]; | ||
|
||
const bodyelt = rootelt.querySelector("w\\:document > w\\:body"); | ||
|
||
bodyelt.childNodes.forEach(child => { | ||
const res = process_body_elt(child, true); | ||
if(res) docx.p.push(res); | ||
}) | ||
|
||
return docx; | ||
|
||
// const paragraphs = dom.window.document.querySelectorAll("w\\:p"); | ||
|
||
// const para = parse_para(paragraphs); | ||
|
||
} |