Commit 6dd9e252 authored by Jarrod's avatar Jarrod 💬

Update index.js

parent 1bae6756
const cedict = require('cedict-lookup')
const dedupe = require('dedupe')
class Tokenizer {
constructor(dictionary, options) {
this.dictionary = dictionary
this.options = options
}
tokenize(text) {
let result = []
let i = 0
let pushEntry = text => {
let matches = this.dictionary.getMatch(text)
if (!matches.length) {
result.push({
traditional: text,
simplified: text,
pinyin: null,
english: null
})
} else {
let rawPinyin = dedupe(matches.map(x => x.pinyin.trim().toLowerCase()))
result.push({
traditional: matches[0].traditional,
simplified: matches[0].simplified,
pinyin: rawPinyin.join('/'),
english: dedupe(matches.map(x => x.english)).join('\n')
})
}
}
while (i < text.length) {
// First match two or more characters
if (i != text.length - 1) {
let getTwo = text.slice(i, i + 2)
console.log("getTwo", getTwo);
let entries = this.dictionary.getEntriesStartingWith(getTwo)
let entry
entries.sort((x, y) => y['traditional'].length - x['traditional'].length)
for (let j = 0; j < entries.length; j++) {
let slice = text.slice(i, i + entries[j]['traditional'].length)
if (slice != entries[j]['traditional'] && slice != entries[j]['traditional']) {
continue
}
// JAL - Added IF
if (entries[j]['simplified'] != text && entries[j]['traditional'] != text) {
entry = entries[j]
pushEntry(entry['traditional'])
break
}
}
if (entry) {
i += entry['traditional'].length
continue
}
}
// If all fails, match one character
let character = text[i]
pushEntry(character)
i++
}
return result
}
}
module.exports = function(path) {
let dictionary, options = {path}
dictionary = cedict.loadTraditional(path)
return new Tokenizer(dictionary, options)
}
const cedict = require('cedict-lookup')
const dedupe = require('dedupe')
class Tokenizer {
constructor(dictionary, options) {
this.dictionary = dictionary
this.options = options
}
tokenize(text) {
let result = []
let i = 0
let pushEntry = text => {
let matches = this.dictionary.getMatch(text)
if (!matches.length) {
result.push({
traditional: text,
simplified: text,
pinyin: null,
english: null
})
} else {
let rawPinyin = dedupe(matches.map(x => x.pinyin.trim().toLowerCase()))
result.push({
traditional: matches[0].traditional,
simplified: matches[0].simplified,
pinyin: rawPinyin.join('/'),
english: dedupe(matches.map(x => x.english)).join('\n')
})
}
}
while (i < text.length) {
// First match two or more characters
if (i != text.length - 1) {
let getTwo = text.slice(i, i + 2)
console.log("getTwo", getTwo);
let entries = this.dictionary.getEntriesStartingWith(getTwo)
let entry
entries.sort((x, y) => y['traditional'].length - x['traditional'].length)
for (let j = 0; j < entries.length; j++) {
let slice = text.slice(i, i + entries[j]['traditional'].length)
if (slice != entries[j]['traditional'] && slice != entries[j]['traditional']) {
continue
}
// JAL - Added IF
if (entries[j]['simplified'] != text && entries[j]['traditional'] != text) {
entry = entries[j]
pushEntry(entry['traditional'])
break
}
}
if (entry) {
i += entry['traditional'].length
continue
}
}
// If all fails, match one character
let character = text[i]
pushEntry(character)
i++
}
return result
}
}
module.exports = function(path, variant) {
let dictionary, options = {path}
if (variant === 'simplified') {
dictionary = cedict.loadSimplified(path)
} else {
dictionary = cedict.loadTraditional(path)
}
return new Tokenizer(dictionary, options)
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment