Skip to content

Commit

Permalink
made CommonEntry
Browse files Browse the repository at this point in the history
  • Loading branch information
MyK00L committed Jun 20, 2024
1 parent b298315 commit ccfb01e
Show file tree
Hide file tree
Showing 10 changed files with 204 additions and 135 deletions.
13 changes: 13 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ edition = "2021"

[dependencies]
csv = "1.3.0"
enum_dispatch = "0.3.13"
genanki-rs = "0.4"
indexmap = {version = "2.2.6", features = ["serde"]}
lyon_path = "1"
Expand Down
31 changes: 15 additions & 16 deletions src/anim_cjk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pub struct GraphicsEntry {
pub strokes: Vec<String>,
medians: Vec<Vec<(f32, f32)>>,
}
impl From<GraphicsEntry> for Entry {
impl From<GraphicsEntry> for WordEntry {
fn from(o: GraphicsEntry) -> Self {
Self {
id: o.character.into(),
Expand All @@ -31,17 +31,18 @@ pub struct DictionaryEntry {
pub decomposition: String,
pub radical: String,
}
impl From<DictionaryEntry> for Entry {
impl From<DictionaryEntry> for WordEntry {
fn from(o: DictionaryEntry) -> Self {
let radicals: Vec<String> = o
let radical_deps: Vec<EntryId> = o
.decomposition
.chars()
.filter(|x| is_radical(*x))
.map(Into::<String>::into)
.map(EntryId::Word)
.collect();
Self {
id: o.character.into(),
dependencies: radicals,
dependencies: radical_deps,
..Default::default()
}
}
Expand All @@ -50,21 +51,19 @@ impl From<DictionaryEntry> for Entry {
use std::fs::File;
use std::io::{self, BufRead};

pub fn parse_graphics_zh_hans() -> Vec<GraphicsEntry> {
pub fn parse_graphics_zh_hans() -> impl Iterator<Item = CommonEntry> {
let file = File::open("res/graphicsZhHans.txt").unwrap();
let lines = io::BufReader::new(file).lines().map_while(Result::ok);
let mut ans = Vec::<GraphicsEntry>::new();
for line in lines {
ans.push(serde_json::from_str(&line).unwrap());
}
ans
lines
.map(|x| serde_json::from_str::<GraphicsEntry>(&x).unwrap())
.map(WordEntry::from)
.map(CommonEntry::from)
}
pub fn parse_dictionary_zh_hans() -> Vec<DictionaryEntry> {
pub fn parse_dictionary_zh_hans() -> impl Iterator<Item = CommonEntry> {
let file = File::open("res/dictionaryZhHans.txt").unwrap();
let lines = io::BufReader::new(file).lines().map_while(Result::ok);
let mut ans = Vec::<DictionaryEntry>::new();
for line in lines {
ans.push(serde_json::from_str(&line).unwrap());
}
ans
lines
.map(|x| serde_json::from_str::<DictionaryEntry>(&x).unwrap())
.map(WordEntry::from)
.map(CommonEntry::from)
}
6 changes: 3 additions & 3 deletions src/audio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ pub struct AudioPath {
id: String,
path: PathBuf,
}
impl From<AudioPath> for Entry {
impl From<AudioPath> for WordEntry {
fn from(a: AudioPath) -> Self {
Self {
id: a.id,
Expand All @@ -16,7 +16,7 @@ impl From<AudioPath> for Entry {
}
}

pub fn get_word_audios() -> Vec<AudioPath> {
pub fn get_word_audios() -> impl Iterator<Item = CommonEntry> {
let mut ans = vec![];
for entry in std::fs::read_dir("res/audio-cmn/64k/hsk").unwrap() {
let path = entry.unwrap().path();
Expand All @@ -29,5 +29,5 @@ pub fn get_word_audios() -> Vec<AudioPath> {
ans.push(AudioPath { id, path });
}
}
ans
ans.into_iter().map(WordEntry::from).map(CommonEntry::from)
}
6 changes: 3 additions & 3 deletions src/cedict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pub struct CedictEntry {
pub pinyin: Vec<String>,
pub definitions: IndexMap<String, String>,
}
impl From<CedictEntry> for Entry {
impl From<CedictEntry> for WordEntry {
fn from(o: CedictEntry) -> Self {
Self {
id: o.simplified,
Expand All @@ -34,9 +34,9 @@ impl From<CedictEntry> for Entry {
}
}

pub fn get_cedict() -> Vec<CedictEntry> {
pub fn get_cedict() -> impl Iterator<Item = CommonEntry> {
let file = std::fs::File::open("res/all_cedict.json").unwrap();
let reader = std::io::BufReader::new(file);
let hm: IndexMap<String, CedictEntry> = serde_json::from_reader(reader).unwrap();
hm.into_values().collect()
hm.into_values().map(WordEntry::from).map(CommonEntry::from)
}
143 changes: 116 additions & 27 deletions src/common.rs
Original file line number Diff line number Diff line change
@@ -1,50 +1,63 @@
use enum_dispatch::enum_dispatch;
use ordered_float::NotNan;
use std::cmp::Ordering;

fn catch_unwind_silent<F: FnOnce() -> R + std::panic::UnwindSafe, R>(
f: F,
) -> std::thread::Result<R> {
let prev_hook = std::panic::take_hook();
std::panic::set_hook(Box::new(|_| {}));
let result = std::panic::catch_unwind(f);
std::panic::set_hook(prev_hook);
result
}
pub fn process_pinyin(s: &str) -> String {
let s = prettify_pinyin::prettify(s);
let parser = pinyin_parser::PinyinParser::new()
.preserve_spaces(false)
.preserve_punctuations(true)
.with_strictness(pinyin_parser::Strictness::Loose)
.preserve_miscellaneous(true);
parser
.parse(&s)
.reduce(|acc, s| acc + &s)
.unwrap_or_default()
catch_unwind_silent(|| {
let parser = pinyin_parser::PinyinParser::new()
.preserve_spaces(false)
.preserve_punctuations(true)
.with_strictness(pinyin_parser::Strictness::Loose)
.preserve_miscellaneous(true);
parser
.parse(&s)
.reduce(|acc, s| acc + &s)
.unwrap_or_default()
})
.unwrap_or(s)
}

#[derive(Clone, Debug, Default, PartialEq, Eq)]
#[derive(Clone, Debug, Default)]
pub struct Definition {
pub pinyin: Option<String>,
pub english: Vec<String>,
}

#[derive(Clone, Debug, Default, PartialEq, Eq)]
#[derive(Clone, Debug, Default)]
pub struct Priority {
pub val: NotNan<f32>,
pub max: NotNan<f32>,
}

#[derive(Clone, Debug, PartialEq, Eq)]
#[derive(Clone, Debug)]
pub enum CharWriting {
Strokes(Vec<String>),
Char(char),
}

#[derive(Clone, Debug, Default, Eq)]
pub struct Entry {
#[derive(Clone, Debug, Default)]
pub struct WordEntry {
pub id: String,
pub pinyin: Vec<String>,
pub definitions: Vec<Definition>,
pub freq: Vec<NotNan<f32>>,
pub hsk_lev: Option<u8>,
pub dependencies: Vec<String>,
pub dependencies: Vec<EntryId>,
pub writing: Vec<CharWriting>,
pub traditional: Option<String>,
pub audio_file: Option<std::path::PathBuf>,
}
impl Entry {
impl WordEntry {
pub fn total_priority(&self) -> NotNan<f32> {
let freq: NotNan<f32> = self.freq.iter().sum();
let hsk_lev = self.hsk_lev.unwrap_or(10);
Expand All @@ -56,7 +69,7 @@ impl Entry {

hp * 0.5 + fp * 0.5
}
pub fn merge_add(&mut self, mut o: Entry) {
fn merge_inner(&mut self, mut o: Self) {
if !(self.writing.is_empty() || o.writing.is_empty()) {
eprintln!("{:?} {:?}", self, o.clone());
}
Expand All @@ -82,20 +95,96 @@ impl Entry {
self.hsk_lev = self.hsk_lev.take().or(o.hsk_lev);
}
}
impl Ord for Entry {
fn cmp(&self, other: &Self) -> Ordering {
self.total_priority().cmp(&other.total_priority())
impl Entry for WordEntry {
fn priority(&self) -> NotNan<f32> {
self.total_priority()
}
fn into_note(self, model: genanki_rs::Model) -> genanki_rs::Note {
todo!()
}
fn id(&self) -> EntryId {
EntryId::Word(self.id.clone())
}
fn dependencies(&self) -> Vec<EntryId> {
let mut deps = self.dependencies.clone();
if self.id.chars().count() > 1 {
for c in self.id.chars() {
let cs: EntryId = EntryId::Word(c.into());
if !deps.contains(&cs) {
deps.push(cs);
}
}
}
deps
}
fn merge(&mut self, o: CommonEntry) {
match o {
CommonEntry::WordEntry(we) => self.merge_inner(we),
_ => unreachable!(),
}
}
}

impl PartialOrd for Entry {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
#[derive(Clone, Debug)]
pub struct SyllableEntry {}
impl Entry for SyllableEntry {
fn priority(&self) -> NotNan<f32> {
todo!()
}
fn into_note(self, model: genanki_rs::Model) -> genanki_rs::Note {
todo!()
}
fn id(&self) -> EntryId {
todo!()
}
fn dependencies(&self) -> Vec<EntryId> {
todo!()
}
fn merge(&mut self, o: CommonEntry) {
todo!()
}
}

impl PartialEq for Entry {
fn eq(&self, other: &Self) -> bool {
self.total_priority().eq(&other.total_priority())
#[derive(Clone, Debug)]
pub struct GrammarEntry {}
impl Entry for GrammarEntry {
fn priority(&self) -> NotNan<f32> {
todo!()
}
fn into_note(self, model: genanki_rs::Model) -> genanki_rs::Note {
todo!()
}
fn id(&self) -> EntryId {
todo!()
}
fn dependencies(&self) -> Vec<EntryId> {
todo!()
}
fn merge(&mut self, o: CommonEntry) {
todo!()
}
}

#[allow(clippy::enum_variant_names)]
#[enum_dispatch(Entry)]
#[derive(Debug, Clone)]
pub enum CommonEntry {
WordEntry,
SyllableEntry,
GrammarEntry,
}

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum EntryId {
Word(String),
Syllable(String),
Grammar(String),
}
#[enum_dispatch]
pub trait Entry {
/// Higher priority means it should come earlier in the deck
fn priority(&self) -> NotNan<f32>;
fn into_note(self, model: genanki_rs::Model) -> genanki_rs::Note;
fn id(&self) -> EntryId;
fn dependencies(&self) -> Vec<EntryId>;
fn merge(&mut self, o: CommonEntry);
}
17 changes: 9 additions & 8 deletions src/freq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub struct FreqRecord {
#[serde(rename = "Dominant.PoS")]
pos: String,
}
impl From<FreqRecord> for Entry {
impl From<FreqRecord> for WordEntry {
fn from(r: FreqRecord) -> Self {
let freq = r.wm / 1000000f32;
Self {
Expand All @@ -22,16 +22,17 @@ impl From<FreqRecord> for Entry {
}
}

pub fn get_records() -> Vec<FreqRecord> {
pub fn get_records() -> impl Iterator<Item = CommonEntry> {
let file = std::fs::File::open("res/SUBTLEX-CH.txt").unwrap();
let reader = std::io::BufReader::new(file);
let mut rdr = csv::ReaderBuilder::new()
.delimiter(b'\t')
.from_reader(reader);
let mut ans = vec![];
for result in rdr.deserialize() {
let record: FreqRecord = result.unwrap();
ans.push(record);
}
ans
let v: Vec<CommonEntry> = rdr
.deserialize::<FreqRecord>()
.map(|r| r.unwrap())
.map(WordEntry::from)
.map(CommonEntry::from)
.collect();
v.into_iter()
}
Loading

0 comments on commit ccfb01e

Please sign in to comment.