Skip to content

Instantly share code, notes, and snippets.

@willtim
Created February 11, 2011 18:15
Show Gist options
  • Save willtim/822769 to your computer and use it in GitHub Desktop.
Save willtim/822769 to your computer and use it in GitHub Desktop.
Simple Clojure API for VTD-XML - much faster than clojure.contrib.zip-filter.xml
(ns willtim.clj-vtd-xml
(:import [com.ximpleware VTDGen VTDNav AutoPilot])
(:require
[clojure.contrib.duck-streams :as ds]))
;;
;; Clojure API for VTD-XML
;;
;; Designed to work like clojure.contrib.zip-filter.xml, e.g.
;;
;; (for [book (vtd-> doc :book)
;; report (vtd-> book :trade :report)]
;; (vtd-content (vtd1-> report :reportName))
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(declare vtd->)
(declare vtd1->)
(declare vtd-xpath)
(declare vtd-content)
(defn parse-gzip-stream [in]
"Parses a GZIP XML inputstream into a VTDNav"
(let [ba (with-open [s (-> in
(java.io.BufferedInputStream.)
(java.util.zip.GZIPInputStream.))]
(ds/to-byte-array s))
vg (doto (VTDGen.)
(.setDoc ba)
(.parse false))]
(.getNav vg)))
(defn vtd-> [vn & keywords]
(let [depth (.getCurrentDepth vn)
xpath (apply str (interpose "/" (map name keywords)))]
(if (= depth 0)
(vtd-xpath vn (str "//" xpath))
(vtd-xpath vn xpath))))
(defn vtd1-> [vn & keywords]
(let [col (apply (partial vtd-> vn) keywords)]
(if (empty? col)
nil
(first col))))
(defn vtd-xpath [vn xpath]
(letfn [(vn-seq [vn ap]
(let [r (.evalXPath ap)]
(if (= r -1)
[]
(cons (.cloneNav vn)
(lazy-seq (vn-seq vn ap))))))]
(let [vn* (.cloneNav vn)
ap (doto (AutoPilot. vn*)
(.selectXPath xpath))]
(vn-seq vn* ap))))
(defn vtd-content [vn]
(if (nil? vn)
""
(.toNormalizedString vn (.getText vn))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment