Skip to content

Commit

Permalink
Build relationship indices after import complete
Browse files Browse the repository at this point in the history
Fixes #42 : we cannot do index creation at time of import, as we don't have enough information at the time to appropriately create or delete relationship indices, when more than one relationship can reference the same tuple source-target-type-modifier. This means we must perform two passes : import of relationship data, and then (re-)build the parent and child indices on the basis of the relationships stored. As index creation takes only seconds, we can run automatically after import, although this means that if the user imports from more than one distribution, the indices will be destroyed and then rebuilt.
  • Loading branch information
wardle committed Nov 7, 2022
1 parent 8a18b8a commit fd90ded
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 21 deletions.
11 changes: 8 additions & 3 deletions src/com/eldrix/hermes/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -723,22 +723,27 @@
"Import SNOMED distribution files from the directories `dirs` specified into
the database directory `root` specified. Import is performed in two phases
for each directory - firstly core components and essential metadata, and
secondly non-core and extension files."
secondly non-core and extension files. Finally, store indices are re-built"
[root dirs]
(let [manifest (open-manifest root true)
store-filename (get-absolute-filename root (:store manifest))]
(doseq [dir dirs]
(log-metadata dir)
(let [files (importer/importable-files dir)]
(do-import-snomed store-filename (->> files (filter #(core-components (:component %)))))
(do-import-snomed store-filename (->> files (remove #(core-components (:component %)))))))))
(do-import-snomed store-filename (->> files (remove #(core-components (:component %)))))
(with-open [st (store/open-store store-filename {:read-only? false})]
(log/info "Rebuilding store indices...")
(store/build-indices st)
(log/info "Rebuilding store indices... completed"))))))

(defn compact
[root]
(let [manifest (open-manifest root false)]
(log/info "Compacting database at " root "...")
(with-open [st (store/open-store (get-absolute-filename root (:store manifest)) {:read-only? false})]
(store/compact st))
(log/info "Compacting database... complete.")))

(defn build-search-indices
([root] (build-search-indices root (.toLanguageTag (Locale/getDefault))))
Expand Down Expand Up @@ -797,7 +802,7 @@
(s/valid? :info.snomed/Concept (get-concept svc 24700007))

(tap> (get-concept svc 24700007))
(tap> (get-extended-concept svc 24700007))
(tap> (get-extended-concept svc 205631000000104))
(get-extended-concept svc 24700007)
(search svc {:s "mult scl"})
(tap> (search svc {:s "mult scl"}))
Expand Down
56 changes: 38 additions & 18 deletions src/com/eldrix/hermes/impl/lmdb.clj
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@
^Dbi conceptParentRelationships ;; sourceId - typeId - group - destinationId
^Dbi conceptChildRelationships ;; destinationId - typeId - group - sourceId
^Dbi componentRefsets ;; referencedComponentId - refsetId - msb - lsb
^Dbi associations ;; targetComponentId - refsetId - referencedComponentId - msb - lsb
^Dbi associations ;; targetComponentId - refsetId - referencedComponentId - msb - lsb
;;;; refset env
^Env refsetsEnv
^Dbi refsetItems ;; refset-item-id = refset-item
^Dbi refsetFieldNames] ;; refset-id = field-names]
^Dbi refsetFieldNames] ;; refset-id = field-names]
Closeable
(close [_]
(.close ^Env coreEnv)
Expand Down Expand Up @@ -187,29 +187,49 @@
[^LmdbStore store relationships]
(with-open [txn (.txnWrite ^Env (.-coreEnv store))]
(let [db ^Dbi (.-relationships store)
parent-idx ^Dbi (.-conceptParentRelationships store)
child-idx ^Dbi (.-conceptChildRelationships store)
kb (.directBuffer (PooledByteBufAllocator/DEFAULT) 8) ;; relationship id
vb (.directBuffer (PooledByteBufAllocator/DEFAULT) 64) ;; relationship entity
parent-idx-key (.directBuffer (PooledByteBufAllocator/DEFAULT) 32) ;; sourceId -- typeId -- group -- destinationId
child-idx-key (.directBuffer (PooledByteBufAllocator/DEFAULT) 32) ;; destinationId -- typeId -- group -- sourceId
idx-val (.directBuffer (PooledByteBufAllocator/DEFAULT) 0)]
vb (.directBuffer (PooledByteBufAllocator/DEFAULT) 64)] ;; relationship entity
(try (doseq [^Relationship relationship relationships]
(doto kb .clear (.writeLong (.-id relationship)))
(doto parent-idx-key .clear (.writeLong (.-sourceId relationship)) (.writeLong (.-typeId relationship)) (.writeLong (.-relationshipGroup relationship)) (.writeLong (.-destinationId relationship)))
(doto child-idx-key .clear (.writeLong (.-destinationId relationship)) (.writeLong (.-typeId relationship)) (.writeLong (.-relationshipGroup relationship)) (.writeLong (.-sourceId relationship)))
(when (should-write-object? db txn kb 8 (.-effectiveTime relationship)) ;; skip a 8 byte key (relationship-id)
(.clear vb)
(ser/write-relationship vb relationship)
(.put db txn kb vb put-flags)
(if (.-active relationship)
(do (.put parent-idx txn parent-idx-key idx-val put-flags)
(.put child-idx txn child-idx-key idx-val put-flags))
(do (.delete parent-idx txn parent-idx-key) ;; if its inactive, we're careful to delete any existing indices
(.delete child-idx txn child-idx-key))))) ;; so that update-in-place does work

(.put db txn kb vb put-flags)))
(.commit txn)
(finally (.release kb) (.release vb) (.release parent-idx-key) (.release child-idx-key) (.release idx-val))))))
(finally (.release kb) (.release vb))))))

(defn drop-relationships-index [^LmdbStore store]
(with-open [txn ^Txn (.txnWrite ^Env (.-coreEnv store))]
(let [parent-idx ^Dbi (.-conceptParentRelationships store)
child-idx ^Dbi (.-conceptChildRelationships store)]
(.drop parent-idx txn)
(.drop child-idx txn))
(.commit txn)))

(defn index-relationships
"Iterates all active relationships and rebuilds parent and child indices."
[^LmdbStore store]
(with-open [write-txn ^Txn (.txnWrite ^Env (.-coreEnv store))
read-txn ^Txn (.txnRead ^Env (.-coreEnv store))
cursor (.openCursor ^Dbi (.-relationships store) read-txn)]
(let [parent-idx ^Dbi (.-conceptParentRelationships store)
child-idx ^Dbi (.-conceptChildRelationships store)
parent-idx-key (.directBuffer (PooledByteBufAllocator/DEFAULT) 32) ;; sourceId -- typeId -- group -- destinationId
child-idx-key (.directBuffer (PooledByteBufAllocator/DEFAULT) 32) ;; destinationId -- typeId -- group -- sourceId
idx-val (.directBuffer (PooledByteBufAllocator/DEFAULT) 0)] ;; empty value
(try
(loop [continue? (.first cursor)]
(when continue?
(let [relationship ^Relationship (ser/read-relationship (.val cursor))]
(when (.-active relationship)
(doto parent-idx-key .clear (.writeLong (.-sourceId relationship)) (.writeLong (.-typeId relationship)) (.writeLong (.-relationshipGroup relationship)) (.writeLong (.-destinationId relationship)))
(doto child-idx-key .clear (.writeLong (.-destinationId relationship)) (.writeLong (.-typeId relationship)) (.writeLong (.-relationshipGroup relationship)) (.writeLong (.-sourceId relationship)))
(.put parent-idx write-txn parent-idx-key idx-val put-flags)
(.put child-idx write-txn child-idx-key idx-val put-flags)))
(.resetReaderIndex ^ByteBuf (.val cursor)) ;; reset position in value otherwise .next will throw an exception on second item
(recur (.next cursor))))
(.commit write-txn)
(finally (.release parent-idx-key) (.release child-idx-key) (.release idx-val))))))

(defn- write-refset-headings
[^LmdbStore store ^Txn txn refset-id headings]
Expand Down
5 changes: 5 additions & 0 deletions src/com/eldrix/hermes/impl/store.clj
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,11 @@
(catch Exception _
(write-batch-one-by-one batch store))))


(defn build-indices [store]
(kv/drop-relationships-index store)
(kv/index-relationships store))

(defmulti is-a? (fn [_store concept _parent-id] (class concept)))

(defmethod is-a? Long [store concept-id parent-id]
Expand Down
24 changes: 24 additions & 0 deletions test/com/eldrix/hermes/store_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
(store/write-batch {:type :info.snomed/Concept :data concepts} st)
(store/write-batch {:type :info.snomed/Description :data descriptions} st)
(store/write-batch {:type :info.snomed/Relationship :data relationships} st)
(store/build-indices st)
(testing "Concept read/write"
(is (every? true? (map #(= % (store/get-concept st (:id %))) concepts))))
(testing "Concept descriptions"
Expand All @@ -70,6 +71,29 @@
(is (every? true? (map #(store/is-a? st % (:id root-concept)) concepts)))
(is (= (set (map :id concepts)) (store/get-all-children st (:id root-concept))))))))

(comment
(def st (store/open-store))
(def rel-1 (gen/generate (rf2/gen-relationship {:sourceId 24700007 :destinationId 6118003 :typeId 116680003 :active false :effectiveTime (java.time.LocalDate/of 2020 1 1)})))
(def rel-2 (gen/generate (rf2/gen-relationship {:sourceId 24700007 :destinationId 6118003 :typeId 116680003 :active true :effectiveTime (java.time.LocalDate/of 2020 1 1)})))
(store/write-batch {:type :info.snomed/Relationship :data [rel-2 rel-1]} st)
(store/get-parent-relationships st 24700007))

(deftest write-relationships
;; 3229461000000123 20210512 1 999000011000000103 1089261000000101 213345000 0 116680003 900000000000011006 900000000000451002
;; 5687171000000128 20210512 0 999000011000000103 1089261000000101 213345000 0 116680003 900000000000011006 900000000000451002
(let [r1 (gen/generate (rf2/gen-relationship {:sourceId 1089261000000101 :destinationId 213345000 :typeId 116680003 :active false :effectiveTime (java.time.LocalDate/of 2021 5 12)}))
r2 (gen/generate (rf2/gen-relationship {:sourceId 1089261000000101 :destinationId 213345000 :typeId 116680003 :active true :effectiveTime (java.time.LocalDate/of 2021 5 12)}))]
(with-open [st (store/open-store)]
(store/write-batch {:type :info.snomed/Relationship :data [r1 r2]} st)
(store/build-indices st)
(is (= {116680003 #{213345000}} (store/get-parent-relationships st 1089261000000101))))
(with-open [st (store/open-store)]
(store/write-batch {:type :info.snomed/Relationship :data [r2 r1]} st)
(store/build-indices st)
(is (= {116680003 #{213345000}} (store/get-parent-relationships st 1089261000000101))
"Different relationships with same source, target and type identifiers should result in indices deterministically, not on basis of import order"))))


(deftest write-simple-refsets-test
(with-open [st (store/open-store)]
(let [n-concepts (rand-int 10000)
Expand Down

0 comments on commit fd90ded

Please sign in to comment.