Skip to content

Commit

Permalink
---------------------------------------
Browse files Browse the repository at this point in the history
  • Loading branch information
jimichan committed Jun 10, 2022
1 parent bbce5e1 commit 5c91be2
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 20 deletions.
6 changes: 5 additions & 1 deletion build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,17 @@ plugins {

description = "mynlp是mayabot开源的中文自然语言处理工具集"

val buildVersion = "4.1.0-beta5"
val buildVersion = "4.1.0-beta8"
//val buildVersion = "4.0.1-local"
val snapShot = false

allprojects {
repositories {
maven {
url = java.net.URI("https://repo.huaweicloud.com/repository/maven/")
}
mavenCentral()

}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,23 @@ public abstract class InternalLoggerFactory {
private static InternalLoggerFactory newDefaultFactory(String name) {
InternalLoggerFactory f;
try {
f = new Slf4JLoggerFactory(true);
f.newInstance(name).debug("Using SLF4J as the default logging framework");
} catch (Throwable t1) {
f = Log4J2LoggerFactory.INSTANCE;
f.newInstance(name).debug("Using Log4J2 as the default logging framework");
} catch (Throwable t3) {
try {
f = Log4JLoggerFactory.INSTANCE;
f.newInstance(name).debug("Using Log4J as the default logging framework");
} catch (Throwable t2) {
f = JdkLoggerFactory.INSTANCE;
f.newInstance(name).debug("Using java.util.logging as the default logging framework");
f = new Slf4JLoggerFactory(true);
f.newInstance(name).debug("Using SLF4J as the default logging framework");
} catch (Throwable t1) {
try {
f = Log4JLoggerFactory.INSTANCE;
f.newInstance(name).debug("Using Log4J as the default logging framework");
} catch (Throwable t2) {
f = JdkLoggerFactory.INSTANCE;
f.newInstance(name).debug("Using java.util.logging as the default logging framework");
}
}
}

return f;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ import com.mayabot.nlp.fasttext.utils.*
import java.io.IOException
import java.nio.ByteBuffer
import java.nio.channels.FileChannel
import java.util.*
import kotlin.collections.HashMap
import kotlin.math.min
import kotlin.random.Random

Expand Down Expand Up @@ -378,11 +376,17 @@ class Dictionary(
val ntokens = buffer.readLong()
val pruneidxSize = buffer.readLong()

println("size:$size")
println("nwords:$nwords")
println("nlabels:$nlabels")
println("ntokens:$ntokens")

// word_hash_2_id = new LongIntScatterMap(size_);
val wordList = ArrayList<Entry>(size)

for (i in 0 until size) {
val e = Entry(buffer.readUTF(), buffer.readLong(), EntryType.fromValue(buffer.readUnsignedByte().toInt()))
val e =
Entry(buffer.readUTF(), buffer.readLong(), EntryType.fromValue(buffer.readUnsignedByte().toInt()))
wordList.add(e)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ object PinyinDistance {
* @param ba1
* @return
*/
private fun editDistanceClose2dCode(a: SimplePinyin, b: SimplePinyin): Float {
fun editDistanceClose2dCode(a: SimplePinyin, b: SimplePinyin): Float {

if (a == SimplePinyin.none || b == SimplePinyin.none) {
error("editDistanceClose2dCode not for none")
Expand All @@ -196,7 +196,7 @@ object PinyinDistance {
return res.toFloat()
}

private fun editDistanceClose2dCode(a: Pinyin, b: Pinyin): Float {
fun editDistanceClose2dCode(a: Pinyin, b: Pinyin): Float {

if (a.simple == SimplePinyin.none || b.simple == SimplePinyin.none) {
error("editDistanceClose2dCode not for none")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,11 @@ class PinyinInnerDict(private val env: MynlpEnv) {
}

fun charPinyin(char: Char): Array<SimplePinyin>? {
return charPinyin[char.code]
val code = char.code
if (code >= charPinyin.size) {
return null
}
return charPinyin[code]
}

private fun load(): Map<String, Array<Pinyin>> {
Expand Down
35 changes: 30 additions & 5 deletions mynlp/src/test/java/com/mayabot/nlp/fasttext/TestWords.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.mayabot.nlp.fasttext

import com.mayabot.nlp.blas.cosine
import java.io.File

fun main() {
Expand All @@ -8,11 +9,35 @@ fun main() {
//
// val fastText = FastText.loadCppModel(file)

val fastText = FastText.loadModel(File("/Users/jimichan/Downloads/wiki.fasttext"),true)
val fastText = FastText.loadModel(File("/Users/jimichan/mynlp.data/wordvec.vec"), true)

val k = fastText.nearestNeighbor("上海",5)
println("加载模型到内存完成")

println(k)
println(fastText.analogies("柏林","德国","法国",5))
// val k = fastText.nearestNeighbor("丢失",5)

}
fastText.like("", "丢失")
fastText.like("遗落", "丢失")
fastText.like("偷走", "丢失")
fastText.like("遗失", "丢失")
fastText.like("遗失", "遗落")
fastText.like("失去", "丢失")
fastText.like("上海", "丢失")
fastText.like("挂失", "补办")

println("----------------")
fastText.senLike("卡 丢失 了", "卡 被 偷走 了")
fastText.senLike("卡 丢失 了", "信用卡 忘记 密码 ")

// println(fastText.analogies("柏林","德国","法国",5))

}

private fun FastText.like(word1: String, word2: String) {
val cos = cosine(this.getWordVector(word1), this.getWordVector(word2))
println("$word1 <-> $word2 : ${cos}")
}

private fun FastText.senLike(word1: String, word2: String) {
val cos = cosine(this.getSentenceVector(word1.split(" ")), this.getSentenceVector(word2.split(" ")))
println("$word1 <-> $word2 : ${cos}")
}

0 comments on commit 5c91be2

Please sign in to comment.