From 98d1c18da2b0a3ba49fe31d7956618262da68393 Mon Sep 17 00:00:00 2001 From: KonradStaniec Date: Thu, 2 Jun 2022 08:01:48 +0200 Subject: [PATCH 1/3] Use queries with custom xor function --- fluffy/content_db.nim | 161 ++++++++++++------------------- fluffy/tests/test_content_db.nim | 53 ---------- vendor/nim-eth | 2 +- 3 files changed, 65 insertions(+), 151 deletions(-) diff --git a/fluffy/content_db.nim b/fluffy/content_db.nim index 95df07440..3a27b22b0 100644 --- a/fluffy/content_db.nim +++ b/fluffy/content_db.nim @@ -29,16 +29,11 @@ export kvstore_sqlite3 # 3. Or databases are created per network (and kvstores pre content type) and # thus depending on the network the right db needs to be selected. -const - # Maximal number of ObjInfo objects held in memory per database scan. 100k - # objects should result in memory usage of around 7mb which should be - # appropriate for even low resource devices - maxObjPerScan = 100000 - type RowInfo = tuple contentId: array[32, byte] payloadLength: int64 + distance: array[32, byte] ObjInfo* = object contentId*: array[32, byte] @@ -51,7 +46,8 @@ type sizeStmt: SqliteStmt[NoParams, int64] unusedSizeStmt: SqliteStmt[NoParams, int64] vacStmt: SqliteStmt[NoParams, void] - getAll: SqliteStmt[NoParams, RowInfo] + contentSizeStmt: SqliteStmt[NoParams, int64] + getAllOrderedByDistanceStmt: SqliteStmt[array[32, byte], RowInfo] PutResultType* = enum ContentStored, DbPruned @@ -65,9 +61,21 @@ type fractionOfDeletedContent*: float64 numOfDeletedElements*: int64 -# Objects must be sorted from largest to closest distance -proc `<`(a, b: ObjInfo): bool = - return a.distFrom < b.distFrom +func xorDistance( + a: openArray[byte], + b: openArray[byte] +): Result[seq[byte], cstring] {.cdecl.} = + var s: seq[byte] = newSeq[byte](32) + + if len(a) != 32 or len(b) != 32: + return err("Blobs should have 32 byte length") + + var i = 0 + while i < 32: + s[i] = a[i] xor b[i] + inc i + + return ok(s) template expectDb(x: auto): untyped = # There's no meaningful error handling implemented for a corrupt database or @@ -82,6 +90,9 @@ proc new*(T: type ContentDB, path: string, maxSize: uint32, inMemory = false): C else: SqStoreRef.init(path, "fluffy").expectDb() + db.registerCustomScalarFunction("xorDistance", xorDistance) + .expect("Couldn't register custom xor function") + let getSizeStmt = db.prepareStmt( "SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size();", NoParams, int64).get() @@ -96,11 +107,14 @@ proc new*(T: type ContentDB, path: string, maxSize: uint32, inMemory = false): C let kvStore = kvStore db.openKvStore().expectDb() - # This needs to go after `openKvStore`, as it checks whether the table name - # kvstore already exists. - let getKeysStmt = db.prepareStmt( - "SELECT key, length(value) FROM kvstore", - NoParams, RowInfo + let contentSizeStmt = db.prepareStmt( + "SELECT SUM(length(value)) FROM kvstore", + NoParams, int64 + ).get() + + let getAllOrderedByDistanceStmt = db.prepareStmt( + "SELECT key, length(value), xorDistance(?, key) as distance FROM kvstore ORDER BY distance DESC", + array[32, byte], RowInfo ).get() ContentDB( @@ -108,59 +122,11 @@ proc new*(T: type ContentDB, path: string, maxSize: uint32, inMemory = false): C maxSize: maxSize, sizeStmt: getSizeStmt, vacStmt: vacStmt, - getAll: getKeysStmt, - unusedSizeStmt: unusedSize + unusedSizeStmt: unusedSize, + contentSizeStmt: contentSizeStmt, + getAllOrderedByDistanceStmt: getAllOrderedByDistanceStmt ) -proc getNFurthestElements*( - db: ContentDB, target: UInt256, n: uint64): (seq[ObjInfo], int64) = - ## Get at most n furthest elements from db in order from furthest to closest. - ## Payload lengths are also returned so the caller can decide how many of - ## those elements need to be deleted. - ## - ## Currently it uses xor metric - ## - ## Currently works by querying for all elements in database and doing all - ## necessary work on program level. This is mainly due to two facts: - ## - sqlite does not have build xor function, also it does not handle bitwise - ## operations on blobs as expected - ## - our nim wrapper for sqlite does not support create_function api of sqlite - ## so we cannot create custom function comparing blobs at sql level. If that - ## would be possible we may be able to all this work by one sql query - - if n == 0: - return (newSeq[ObjInfo](), 0'i64) - - var heap = initHeapQueue[ObjInfo]() - var totalContentSize: int64 = 0 - - var ri: RowInfo - for e in db.getAll.exec(ri): - let contentId = UInt256.fromBytesBE(ri.contentId) - # TODO: Currently it assumes xor distance, but when we start testing - # networks with other distance functions this needs to be adjusted to the - # custom distance function - let dist = contentId xor target - let obj = ObjInfo( - contentId: ri.contentId, payloadLength: ri.payloadLength, distFrom: dist) - - if (uint64(len(heap)) < n): - heap.push(obj) - else: - if obj > heap[0]: - discard heap.replace(obj) - - totalContentSize = totalContentSize + ri.payloadLength - - var res: seq[ObjInfo] = newSeq[ObjInfo](heap.len()) - - var i = heap.len() - 1 - while heap.len() > 0: - res[i] = heap.pop() - dec i - - return (res, totalContentSize) - proc reclaimSpace*(db: ContentDB): void = ## Runs sqlite VACUUM commands which rebuilds the db, repacking it into a ## minimal amount of disk space. @@ -195,6 +161,13 @@ proc unusedSize(db: ContentDB): int64 = proc realSize*(db: ContentDB): int64 = db.size() - db.unusedSize() +proc contentSize(db: ContentDB): int64 = + ## Returns total size of content stored in DB + var size: int64 = 0 + discard (db.contentSizeStmt.exec do(res: int64): + size = res).expectDb() + return size + proc get*(db: ContentDB, key: openArray[byte]): Option[seq[byte]] = var res: Option[seq[byte]] proc onData(data: openArray[byte]) = res = some(@data) @@ -233,41 +206,35 @@ proc contains*(db: ContentDB, key: ContentId): bool = proc del*(db: ContentDB, key: ContentId) = db.del(key.toByteArrayBE()) -proc deleteFractionOfContent*( - db: ContentDB, - target: Uint256, - targetFraction: float64): (UInt256, int64, int64, int64) = - ## Procedure which tries to delete fraction of database by scanning maxObjPerScan - ## furthest elements. - ## If the maxObjPerScan furthest elements, is not enough to attain required fraction - ## procedure deletes all but one element and report how many bytes have been - ## deleted - ## Procedure do not call reclaim space, it is left to the caller. - - let (furthestElements, totalContentSize) = db.getNFurthestElements(target, maxObjPerScan) - var bytesDeleted: int64 = 0 - let bytesToDelete = int64(targetFraction * float64(totalContentSize)) - let numOfElements = len(furthestElements) - var numOfDeletedElements: int64 = 0 - - if numOfElements == 0: - # no elements in database, return some zero value - return (UInt256.zero, 0'i64, 0'i64, 0'i64) +proc deleteContentFraction( + db: ContentDB, + target: UInt256, + fraction: float64): (UInt256, int64, int64, int64) = - let lastIdx = len(furthestElements) - 1 + doAssert( + fraction > 0 and fraction < 1, + "Deleted fraction shohould be > 0 and < 1" + ) - for i, elem in furthestElements: - if i == lastIdx: - # this is our last element, do not delete it and report it as last non deleted - # element - return (elem.distFrom, bytesDeleted, totalContentSize, numOfDeletedElements) + let totalContentSize = db.contentSize() + let bytesToDelete = int64(fraction * float64(totalContentSize)) + var numOfDeletedElements: int64 = 0 - if bytesDeleted + elem.payloadLength < bytesToDelete: - db.del(elem.contentId) - bytesDeleted = bytesDeleted + elem.payloadLength + var ri: RowInfo + var bytesDeleted: int64 = 0 + let targetBytes = target.toByteArrayBE() + for e in db.getAllOrderedByDistanceStmt.exec(targetBytes, ri): + if bytesDeleted + ri.payloadLength < bytesToDelete: + db.del(ri.contentId) + bytesDeleted = bytesDeleted + ri.payloadLength inc numOfDeletedElements else: - return (elem.distFrom, bytesDeleted, totalContentSize, numOfDeletedElements) + return ( + UInt256.fromBytesBE(ri.contentid), + bytesDeleted, + totalContentSize, + numOfDeletedElements + ) proc put*( db: ContentDB, @@ -299,7 +266,7 @@ proc put*( totalContentSize, deletedElements ) = - db.deleteFractionOfContent(target, 0.25) + db.deleteContentFraction(target, 0.25) let deletedFraction = float64(deletedBytes) / float64(totalContentSize) diff --git a/fluffy/tests/test_content_db.nim b/fluffy/tests/test_content_db.nim index 600351210..12fb441d5 100644 --- a/fluffy/tests/test_content_db.nim +++ b/fluffy/tests/test_content_db.nim @@ -102,59 +102,6 @@ suite "Content Database": size6 == size1 realSize2 == size6 - type TestCase = object - keys: seq[UInt256] - n: uint64 - - proc init(T: type TestCase, keys: seq[UInt256], n: uint64): T = - TestCase(keys: keys, n: n) - - proc hasCorrectOrder(s: seq[ObjInfo], expectedOrder: seq[Uint256]): bool = - var i = 0 - for e in s: - if (e.distFrom != expectedOrder[i]): - return false - inc i - return true - - test "Get N furthest elements from db": - # we check distances from zero as num xor 0 = num, so each uint in sequence is valid - # distance - let zero = u256(0) - let testCases = @[ - TestCase.init(@[], 10), - TestCase.init(@[u256(1), u256(2)], 1), - TestCase.init(@[u256(1), u256(2)], 2), - TestCase.init(@[u256(5), u256(1), u256(2), u256(4)], 2), - TestCase.init(@[u256(5), u256(1), u256(2), u256(4)], 4), - TestCase.init(@[u256(57), u256(32), u256(108), u256(4)], 2), - TestCase.init(@[u256(57), u256(32), u256(108), u256(4)], 4), - TestCase.init(generateNRandomU256(rng[], 10), 5), - TestCase.init(generateNRandomU256(rng[], 10), 10) - ] - - for testCase in testCases: - let - db = ContentDB.new("", uint32.high, inMemory = true) - - for elem in testCase.keys: - discard db.put(elem, genByteSeq(32), testId) - - let (furthest, _) = db.getNFurthestElements(zero, testCase.n) - - var sortedKeys = testCase.keys - - sortedKeys.sort(SortOrder.Descending) - - if uint64(len(testCase.keys)) < testCase.n: - check: - len(furthest) == len(testCase.keys) - else: - check: - uint64(len(furthest)) == testCase.n - check: - furthest.hasCorrectOrder(sortedKeys) - test "ContentDB pruning": let maxDbSize = uint32(100000) diff --git a/vendor/nim-eth b/vendor/nim-eth index dffaa78cb..dacf827a8 160000 --- a/vendor/nim-eth +++ b/vendor/nim-eth @@ -1 +1 @@ -Subproject commit dffaa78cbedd47d3ee00ba1fdf2b130c47e75793 +Subproject commit dacf827a8653459429623be7ceaf6ecca20fcf35 From 70dbe324b35340a381df95d082fb3f507acaf994 Mon Sep 17 00:00:00 2001 From: KonradStaniec Date: Thu, 2 Jun 2022 15:18:48 +0200 Subject: [PATCH 2/3] Minor fix --- fluffy/content_db.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fluffy/content_db.nim b/fluffy/content_db.nim index 3a27b22b0..743aa3b7e 100644 --- a/fluffy/content_db.nim +++ b/fluffy/content_db.nim @@ -230,7 +230,7 @@ proc deleteContentFraction( inc numOfDeletedElements else: return ( - UInt256.fromBytesBE(ri.contentid), + UInt256.fromBytesBE(ri.distance), bytesDeleted, totalContentSize, numOfDeletedElements From 3927c7338e70b43b5b4b16bda5ada8f9b54942c2 Mon Sep 17 00:00:00 2001 From: KonradStaniec Date: Fri, 3 Jun 2022 11:46:24 +0200 Subject: [PATCH 3/3] Pr comments --- fluffy/content_db.nim | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fluffy/content_db.nim b/fluffy/content_db.nim index 743aa3b7e..cc2c3c308 100644 --- a/fluffy/content_db.nim +++ b/fluffy/content_db.nim @@ -210,10 +210,12 @@ proc deleteContentFraction( db: ContentDB, target: UInt256, fraction: float64): (UInt256, int64, int64, int64) = + ## Deletes at most `fraction` percent of content form database. + ## First, content furthest from provided `target` is deleted. doAssert( fraction > 0 and fraction < 1, - "Deleted fraction shohould be > 0 and < 1" + "Deleted fraction should be > 0 and < 1" ) let totalContentSize = db.contentSize()