...
 
Commits (6)
node_modules
npm-debug.log
\ No newline at end of file
package-lock.json
coverage
cache:
key: "$CI_BUILD_REF"
paths:
- node_modules/
test:
stage: test
script:
- npm install
- npm run lint
- npm run test:coverage
coverage: '/All\s+files\s+[|]\s+([\d.]+)/'
artifacts:
paths:
- ./coverage
- ./doc
pages:
stage: deploy
script:
- mv doc public
- mv coverage public/coverage/
artifacts:
paths:
- public
expire_in: 30 days
only:
- master
# chinese-tokenizer
# @nahanil/zh-tokenizer
Tokenizes Chinese texts into words using [CC-CEDICT](https://cc-cedict.org/).
Extended from https://github.com/takumif/cedict-lookup
## Installation
Use npm to install:
~~~
npm install chinese-tokenizer --save
npm install @nahanil/zh-tokenizer --save
~~~
## Updated Usage
......@@ -16,7 +18,7 @@ Make sure to provide the [CC-CEDICT](https://cc-cedict.org/) data.
Will not work with simplified characters
~~~js
const tokenizer = require('chinese-tokenizer')('./cedict.txt')
const tokenizer = require('@nahanil/zh-tokenizer')('./cedict.txt')
console.log(tokenizer.tokenize('我是中国人。'))
~~~
......@@ -26,12 +28,12 @@ console.log(tokenizer.tokenize('我是中国人。'))
Make sure to provide the [CC-CEDICT](https://cc-cedict.org/) data.
~~~js
const tokenizer = require('chinese-tokenizer')('./cedict.txt')
const tokenizer = require('@nahanil/zh-tokenizer')('./cedict.txt')
console.log(tokenizer.tokenize('我是中国人。'))
~~~
~~~js
const tokenizer = require('chinese-tokenizer')('./cedict.txt', 'traditional')
const tokenizer = require('@nahanil/zh-tokenizer')('./cedict.txt', 'traditional')
console.log(tokenizer.tokenize('我是中國人。'))
~~~
......
const cedict = require('cedict-lookup')
const dedupe = require('dedupe')
class Tokenizer {
constructor(dictionary, options) {
this.dictionary = dictionary
this.options = options
}
tokenize(text, allowSingleMultiCharWords) {
let result = []
let i = 0
let pushEntry = text => {
let matches = this.dictionary.getMatch(text)
if (!matches.length) {
result.push({
traditional: text,
simplified: text,
pinyin: null,
english: null
})
} else {
let rawPinyin = dedupe(matches.map(x => x.pinyin.trim().toLowerCase()))
result.push({
traditional: matches[0].traditional,
simplified: matches[0].simplified,
pinyin: rawPinyin.join('/'),
english: dedupe(matches.map(x => x.english)).join('\n')
})
}
}
while (i < text.length) {
// First match two or more characters
if (i != text.length - 1) {
let getTwo = text.slice(i, i + 2)
let entries = this.dictionary.getEntriesStartingWith(getTwo)
let entry
entries.sort((x, y) => y['traditional'].length - x['traditional'].length)
for (let j = 0; j < entries.length; j++) {
let slice = text.slice(i, i + entries[j]['traditional'].length)
if (slice != entries[j]['traditional'] && slice != entries[j]['traditional']) {
continue
}
// JAL - Added IF
if (allowSingleMultiCharWords || (entries[j]['simplified'] != text && entries[j]['traditional'] != text)) {
entry = entries[j]
pushEntry(entry['traditional'])
break
}
}
if (entry) {
i += entry['traditional'].length
continue
}
}
// If all fails, match one character
let character = text[i]
pushEntry(character)
i++
}
return result
}
}
module.exports = function(path, variant) {
let dictionary, options = {path}
if (variant === 'simplified') {
dictionary = cedict.loadSimplified(path)
} else {
dictionary = cedict.loadTraditional(path)
}
return new Tokenizer(dictionary, options)
}
\ No newline at end of file
const cedict = require('cedict-lookup')
const dedupe = require('dedupe')
class Tokenizer {
constructor (dictionary, options) {
this.dictionary = dictionary
this.options = options
}
tokenize (text, allowSingleMultiCharWords) {
let result = []
let i = 0
let pushEntry = text => {
let matches = this.dictionary.getMatch(text)
if (!matches.length) {
result.push({
traditional: text,
simplified: text,
pinyin: null,
english: null
})
} else {
let rawPinyin = dedupe(matches.map(x => x.pinyin.trim().toLowerCase()))
result.push({
traditional: matches[0].traditional,
simplified: matches[0].simplified,
pinyin: rawPinyin.join('/'),
english: dedupe(matches.map(x => x.english)).join('\n')
})
}
}
while (i < text.length) {
// First match two or more characters
if (i !== text.length - 1) {
let getTwo = text.slice(i, i + 2)
let entries = this.dictionary.getEntriesStartingWith(getTwo)
let entry
entries.sort((x, y) => y['traditional'].length - x['traditional'].length)
for (let j = 0; j < entries.length; j++) {
let slice = text.slice(i, i + entries[j]['traditional'].length)
if (slice !== entries[j]['simplified'] && slice !== entries[j]['traditional']) {
continue
}
// JAL - Added IF
if (allowSingleMultiCharWords || (entries[j]['simplified'] !== text && entries[j]['traditional'] !== text)) {
entry = entries[j]
pushEntry(entry[this.options.variant])
break
}
}
if (entry) {
i += entry[this.options.variant].length
continue
}
}
// If all fails, match one character
let character = text[i]
pushEntry(character)
i++
}
return result
}
}
module.exports = function (path, variant) {
const options = { path, variant: variant || 'traditional' }
let dictionary
if (variant === 'simplified') {
dictionary = cedict.loadSimplified(path)
} else {
dictionary = cedict.loadTraditional(path)
}
return new Tokenizer(dictionary, options)
}
.vscode
*.ts
\ No newline at end of file
The MIT License (MIT)
Copyright (c) 2016 Takumi Fujimoto
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
## Usage
```javascript
var cedict = require('cedict-lookup');
var dict = cedict.loadTraditional('path/to/your/cedict_ts.u8');
// var dict = cedict.loadSimplified('path/to/your/cedict_ts.u8');
console.log(
dict.getMatch('你好')
);
// [ Entry {
// traditional: '你好',
// simplified: '你好',
// pinyin: 'ni3 hao3',
// english: 'Hello!/Hi!/How are you?' } ]
console.log(
dict.getEntriesStartingWith('中文')
);
// [ Entry {
// traditional: '中文',
// simplified: '中文',
// pinyin: 'Zhong1 wen2',
// english: 'Chinese/Chinese written language/Chinese writing' },
// Entry {
// traditional: '中文標準交換碼',
// simplified: '中文标准交换码',
// pinyin: 'Zhong1 wen2 biao1 zhun3 jiao1 huan4 ma3',
// english: 'CSIC, Chinese standard interchange code used from 1992' } ]
console.log(
dict.getPrefixEntries('小籠包')
);
// [ Entry {
// traditional: '小',
// simplified: '小',
// pinyin: 'xiao3',
// english: 'small/tiny/few/young' },
// Entry {
// traditional: '小籠包',
// simplified: '小笼包',
// pinyin: 'xiao3 long2 bao1',
// english: 'steamed dumpling' } ]
```
## License
The MIT License
\ No newline at end of file
{
"name": "cedict-lookup",
"version": "0.0.2",
"description": "Look up Chinese words in CC-CEDICT",
"main": "src/cedict.js",
"scripts": {
"test": "node test.js"
},
"repository": {
"type": "git",
"url": "git+https://github.com/takumif/cedict-lookup.git"
},
"keywords": [
"Chinese",
"Dictionary",
"CC-CEDICT"
],
"author": {
"name": "Takumi Fujimoto"
},
"license": "MIT",
"bugs": {
"url": "https://github.com/takumif/cedict-lookup/issues"
},
"homepage": "https://github.com/takumif/cedict-lookup#readme",
"typings": "cedict.d.ts",
"gitHead": "1ff8081a68c38f7ba0fe49853c1b09a31e727efe",
"_id": "cedict-lookup@0.0.2",
"_shasum": "197e8ecaa04936a9499f73d99496a98b110e814b",
"_from": "cedict-lookup@0.0.2",
"_npmVersion": "3.3.6",
"_nodeVersion": "5.0.0",
"_npmUser": {
"name": "takumi",
"email": "takumif@outlook.com"
},
"dist": {
"shasum": "197e8ecaa04936a9499f73d99496a98b110e814b",
"tarball": "https://registry.npmjs.org/cedict-lookup/-/cedict-lookup-0.0.2.tgz"
},
"maintainers": [
{
"name": "takumi",
"email": "takumif@outlook.com"
}
],
"_npmOperationalInternal": {
"host": "packages-16-east.internal.npmjs.com",
"tmp": "tmp/cedict-lookup-0.0.2.tgz_1463013845874_0.8693226873874664"
},
"directories": {},
"_resolved": "https://registry.npmjs.org/cedict-lookup/-/cedict-lookup-0.0.2.tgz"
}
"use strict";
var parser_1 = require("./parser");
/**
* An implementation of Cedict using the prefix tree data structure.
* Each node (except for the root) contains a character, and contains a list of
* entries formed by the characters in the path from the root to the node.
* It uses the traditional attribute as the lookup key into the tree.
*/
var Cedict = (function () {
function Cedict(filename, trad) {
var entries = parser_1.CedictParser.parse(filename);
this.traditional = trad;
this.root = new CedictNode("");
this.populateTree(entries);
}
Cedict.prototype.getMatch = function (query) {
var node = this.getNodeForWord(query);
return node != null ? node.entries : [];
};
Cedict.prototype.getEntriesStartingWith = function (query) {
var node = this.getNodeForWord(query);
return node != null ? this.gatherEntriesUnderNode(node) : [];
};
/**
* E.g. for a query of "我們是" this will return entries for 我 and 我們
*/
Cedict.prototype.getPrefixEntries = function (query) {
var node = this.root;
var entries = [];
for (var i = 0; i < query.length; i++) {
var nextChar = query[i];
if (node.suffixes[nextChar] === undefined) {
break;
}
node = node.suffixes[nextChar];
Array.prototype.push.apply(entries, node.entries);
}
return entries;
};
Cedict.prototype.populateTree = function (entries) {
for (var i = 0; i < entries.length; i++) {
this.insertEntry(entries[i]);
}
};
Cedict.prototype.insertEntry = function (entry) {
var node = this.root;
var characters = this.traditional ? entry.traditional : entry.simplified;
while (node.word != characters) {
var nextChar = characters[node.word.length];
if (node.suffixes[nextChar] === undefined) {
// never seen this character sequence before, so make a node for it
node.suffixes[nextChar] = new CedictNode(node.word + nextChar);
}
node = node.suffixes[nextChar];
}
node.entries.push(entry);
};
Cedict.prototype.gatherEntriesUnderNode = function (node) {
if (node == null) {
return [];
}
var entries = [];
Array.prototype.push.apply(entries, node.entries);
// get the entries from all the child nodes
for (var suffix in node.suffixes) {
Array.prototype.push.apply(entries, this.gatherEntriesUnderNode(node.suffixes[suffix]));
}
return entries;
};
/**
* Returns null if the node is not found
*/
Cedict.prototype.getNodeForWord = function (word) {
var node = this.root;
for (var i = 0; i < word.length; i++) {
var nextChar = word[i];
if (node.suffixes[nextChar] === undefined) {
return null;
}
node = node.suffixes[nextChar];
}
return node;
};
return Cedict;
}());
var CedictNode = (function () {
function CedictNode(w) {
this.word = w;
this.entries = [];
this.suffixes = {};
}
return CedictNode;
}());
function loadTraditional(filename) {
return new Cedict(filename, true);
}
exports.loadTraditional = loadTraditional;
function loadSimplified(filename) {
return new Cedict(filename, false);
}
exports.loadSimplified = loadSimplified;
"use strict";
var Entry = (function () {
function Entry(trad, simpl, pinyin, english) {
this.traditional = trad;
this.simplified = simpl;
this.pinyin = pinyin;
this.english = english;
}
return Entry;
}());
exports.Entry = Entry;
"use strict";
var fs_1 = require("fs");
var entry_1 = require("./entry");
var CedictParser = (function () {
function CedictParser() {
}
/**
* Parses a CEDICT text file into a list of entries
*/
CedictParser.parse = function (file) {
var text = fs_1.readFileSync(file, "utf-8");
return CedictParser.parseCedictText(text);
};
CedictParser.parseCedictText = function (text) {
var lines = text.split("\n");
var entries = [];
for (var i = 0; i < lines.length; i++) {
var line = lines[i];
// ignore non-entry lines
if (line.length === 0 || line[0] === "#") {
continue;
}
entries.push(CedictParser.parseCedictLine(line));
}
return entries;
};
CedictParser.parseCedictLine = function (line) {
// Entries have this format:
// TRADITIONAL SIMPLIFIED [PINYIN] /ENGLISH 1/ENGLISH 2/
var firstSpace = line.indexOf(" ");
var secondSpace = line.indexOf(" ", firstSpace + 1);
var leftBracket = line.indexOf("[");
var rightBracket = line.indexOf("]");
var firstSlash = line.indexOf("/");
var lastNonSlashChar = line.length - 2;
var traditional = line.substr(0, firstSpace);
var simplified = line.substr(firstSpace + 1, secondSpace - firstSpace - 1);
var pinyin = line.substr(leftBracket + 1, rightBracket - leftBracket - 1);
var english = line.substr(firstSlash + 1, lastNonSlashChar - firstSlash - 1);
return new entry_1.Entry(traditional, simplified, pinyin, english);
};
return CedictParser;
}());
exports.CedictParser = CedictParser;
{
"compilerOptions": {
"module": "commonjs",
"target": "es5"
},
"files": [
"src/cedict.ts",
"src/parser.ts",
"src/entry.ts"
]
}
MIT License
Copyright (c) 2013 Manuel Ernst
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# dedupe [![Build Status](https://travis-ci.org/seriousManual/dedupe.png)](https://travis-ci.org/seriousManual/dedupe)
[![NPM](https://nodei.co/npm/dedupe.png)](https://nodei.co/npm/dedupe/)
[![NPM](https://nodei.co/npm-dl/dedupe.png?months=12)](https://nodei.co/npm/dedupe/)
removes duplicates from your array.
## Installation
````bash
$ npm install dedupe
````
## Usage
### primitive types
```javascript
var dedupe = require('dedupe')
var a = [1, 2, 2, 3]
var b = dedupe(a)
console.log(b)
//result: [1, 2, 3]
```
### complex types
Here the string representation of the object is used for comparism. The mechanism is similar to JSON.stringifing but a bit more efficient.
That means that `{}` is considered egal to `{}`.
```javascript
var dedupe = require('dedupe')
var aa = [{a: 2}, {a: 1}, {a: 1}, {a: 1}]
var bb = dedupe(aa)
console.log(bb)
//result: [{a: 2}, {a: 1}]
```
### complex types types with custom hasher
```javascript
var dedupe = require('dedupe')
var aaa = [{a: 2, b: 1}, {a: 1, b: 2}, {a: 1, b: 3}, {a: 1, b: 4}]
var bbb = dedupe(aaa, value => value.a)
console.log(bbb)
//result: [{a: 2, b: 1}, {a: 1,b: 2}]
```
var sigmund = require('sigmund')
function dedupe(client, hasher) {
hasher = hasher || sigmund
var clone = []
var lookup = {}
for (var i = 0; i < client.length; i++) {
var elem = client[i]
var hashed = hasher(elem)
if (!lookup[hashed]) {
clone.push(elem)
lookup[hashed] = true
}
}
return clone
}
module.exports = dedupe
\ No newline at end of file
The ISC License
Copyright (c) Isaac Z. Schlueter and Contributors
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR
IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
# sigmund
Quick and dirty signatures for Objects.
This is like a much faster `deepEquals` comparison, which returns a
string key suitable for caches and the like.
## Usage
```javascript
function doSomething (someObj) {
var key = sigmund(someObj, maxDepth) // max depth defaults to 10
var cached = cache.get(key)
if (cached) return cached
var result = expensiveCalculation(someObj)
cache.set(key, result)
return result
}
```
The resulting key will be as unique and reproducible as calling
`JSON.stringify` or `util.inspect` on the object, but is much faster.
In order to achieve this speed, some differences are glossed over.
For example, the object `{0:'foo'}` will be treated identically to the
array `['foo']`.
Also, just as there is no way to summon the soul from the scribblings
of a cocaine-addled psychoanalyst, there is no way to revive the object
from the signature string that sigmund gives you. In fact, it's
barely even readable.
As with `util.inspect` and `JSON.stringify`, larger objects will
produce larger signature strings.
Because sigmund is a bit less strict than the more thorough
alternatives, the strings will be shorter, and also there is a
slightly higher chance for collisions. For example, these objects
have the same signature:
var obj1 = {a:'b',c:/def/,g:['h','i',{j:'',k:'l'}]}
var obj2 = {a:'b',c:'/def/',g:['h','i','{jkl']}
Like a good Freudian, sigmund is most effective when you already have
some understanding of what you're looking for. It can help you help
yourself, but you must be willing to do some work as well.
Cycles are handled, and cyclical objects are silently omitted (though
the key is included in the signature output.)
The second argument is the maximum depth, which defaults to 10,
because that is the maximum object traversal depth covered by most
insurance carriers.
// different ways to id objects
// use a req/res pair, since it's crazy deep and cyclical
// sparseFE10 and sigmund are usually pretty close, which is to be expected,
// since they are essentially the same algorithm, except that sigmund handles
// regular expression objects properly.
var http = require('http')
var util = require('util')
var sigmund = require('./sigmund.js')
var sreq, sres, creq, cres, test
http.createServer(function (q, s) {
sreq = q
sres = s
sres.end('ok')
this.close(function () { setTimeout(function () {
start()
}, 200) })
}).listen(1337, function () {
creq = http.get({ port: 1337 })
creq.on('response', function (s) { cres = s })
})
function start () {
test = [sreq, sres, creq, cres]
// test = sreq
// sreq.sres = sres
// sreq.creq = creq
// sreq.cres = cres
for (var i in exports.compare) {
console.log(i)
var hash = exports.compare[i]()
console.log(hash)
console.log(hash.length)
console.log('')
}
require('bench').runMain()
}
function customWs (obj, md, d) {
d = d || 0
var to = typeof obj
if (to === 'undefined' || to === 'function' || to === null) return ''
if (d > md || !obj || to !== 'object') return ('' + obj).replace(/[\n ]+/g, '')
if (Array.isArray(obj)) {
return obj.map(function (i, _, __) {
return customWs(i, md, d + 1)
}).reduce(function (a, b) { return a + b }, '')
}
var keys = Object.keys(obj)
return keys.map(function (k, _, __) {
return k + ':' + customWs(obj[k], md, d + 1)
}).reduce(function (a, b) { return a + b }, '')
}
function custom (obj, md, d) {
d = d || 0
var to = typeof obj
if (to === 'undefined' || to === 'function' || to === null) return ''
if (d > md || !obj || to !== 'object') return '' + obj
if (Array.isArray(obj)) {
return obj.map(function (i, _, __) {
return custom(i, md, d + 1)
}).reduce(function (a, b) { return a + b }, '')
}
var keys = Object.keys(obj)
return keys.map(function (k, _, __) {
return k + ':' + custom(obj[k], md, d + 1)
}).reduce(function (a, b) { return a + b }, '')
}
function sparseFE2 (obj, maxDepth) {
var seen = []
var soFar = ''
function ch (v, depth) {
if (depth > maxDepth) return
if (typeof v === 'function' || typeof v === 'undefined') return
if (typeof v !== 'object' || !v) {
soFar += v
return
}
if (seen.indexOf(v) !== -1 || depth === maxDepth) return
seen.push(v)
soFar += '{'
Object.keys(v).forEach(function (k, _, __) {
// pseudo-private values. skip those.
if (k.charAt(0) === '_') return
var to = typeof v[k]
if (to === 'function' || to === 'undefined') return
soFar += k + ':'
ch(v[k], depth + 1)
})
soFar += '}'
}
ch(obj, 0)
return soFar
}
function sparseFE (obj, maxDepth) {
var seen = []
var soFar = ''
function ch (v, depth) {
if (depth > maxDepth) return
if (typeof v === 'function' || typeof v === 'undefined') return
if (typeof v !== 'object' || !v) {
soFar += v
return
}
if (seen.indexOf(v) !== -1 || depth === maxDepth) return
seen.push(v)
soFar += '{'
Object.keys(v).forEach(function (k, _, __) {
// pseudo-private values. skip those.
if (k.charAt(0) === '_') return
var to = typeof v[k]
if (to === 'function' || to === 'undefined') return
soFar += k
ch(v[k], depth + 1)
})
}
ch(obj, 0)
return soFar
}
function sparse (obj, maxDepth) {
var seen = []
var soFar = ''
function ch (v, depth) {
if (depth > maxDepth) return
if (typeof v === 'function' || typeof v === 'undefined') return
if (typeof v !== 'object' || !v) {
soFar += v
return
}
if (seen.indexOf(v) !== -1 || depth === maxDepth) return
seen.push(v)
soFar += '{'
for (var k in v) {
// pseudo-private values. skip those.
if (k.charAt(0) === '_') continue
var to = typeof v[k]
if (to === 'function' || to === 'undefined') continue
soFar += k
ch(v[k], depth + 1)
}
}
ch(obj, 0)
return soFar
}
function noCommas (obj, maxDepth) {
var seen = []
var soFar = ''
function ch (v, depth) {
if (depth > maxDepth) return
if (typeof v === 'function' || typeof v === 'undefined') return
if (typeof v !== 'object' || !v) {
soFar += v
return
}
if (seen.indexOf(v) !== -1 || depth === maxDepth) return
seen.push(v)
soFar += '{'
for (var k in v) {
// pseudo-private values. skip those.
if (k.charAt(0) === '_') continue
var to = typeof v[k]
if (to === 'function' || to === 'undefined') continue
soFar += k + ':'
ch(v[k], depth + 1)
}
soFar += '}'
}
ch(obj, 0)
return soFar
}
function flatten (obj, maxDepth) {
var seen = []
var soFar = ''
function ch (v, depth) {
if (depth > maxDepth) return
if (typeof v === 'function' || typeof v === 'undefined') return
if (typeof v !== 'object' || !v) {
soFar += v
return
}
if (seen.indexOf(v) !== -1 || depth === maxDepth) return
seen.push(v)
soFar += '{'
for (var k in v) {
// pseudo-private values. skip those.
if (k.charAt(0) === '_') continue
var to = typeof v[k]
if (to === 'function' || to === 'undefined') continue
soFar += k + ':'
ch(v[k], depth + 1)
soFar += ','
}
soFar += '}'
}
ch(obj, 0)
return soFar
}
exports.compare =
{
// 'custom 2': function () {
// return custom(test, 2, 0)
// },
// 'customWs 2': function () {
// return customWs(test, 2, 0)
// },
'JSON.stringify (guarded)': function () {
var seen = []
return JSON.stringify(test, function (k, v) {
if (typeof v !== 'object' || !v) return v
if (seen.indexOf(v) !== -1) return undefined
seen.push(v)
return v
})
},
'flatten 10': function () {
return flatten(test, 10)
},
// 'flattenFE 10': function () {
// return flattenFE(test, 10)
// },
'noCommas 10': function () {
return noCommas(test, 10)
},
'sparse 10': function () {
return sparse(test, 10)
},
'sparseFE 10': function () {
return sparseFE(test, 10)
},
'sparseFE2 10': function () {
return sparseFE2(test, 10)
},
sigmund: function() {
return sigmund(test, 10)
},
// 'util.inspect 1': function () {
// return util.inspect(test, false, 1, false)
// },
// 'util.inspect undefined': function () {
// util.inspect(test)
// },
// 'util.inspect 2': function () {
// util.inspect(test, false, 2, false)
// },
// 'util.inspect 3': function () {
// util.inspect(test, false, 3, false)
// },
// 'util.inspect 4': function () {
// util.inspect(test, false, 4, false)
// },
// 'util.inspect Infinity': function () {
// util.inspect(test, false, Infinity, false)
// }
}
/** results
**/
{
"name": "sigmund",
"version": "1.0.1",
"description": "Quick and dirty signatures for Objects.",
"main": "sigmund.js",
"directories": {
"test": "test"
},
"dependencies": {},
"devDependencies": {
"tap": "~0.3.0"
},
"scripts": {
"test": "tap test/*.js",
"bench": "node bench.js"
},
"repository": {
"type": "git",
"url": "git://github.com/isaacs/sigmund.git"
},
"keywords": [
"object",
"signature",
"key",
"data",
"psychoanalysis"
],
"author": {
"name": "Isaac Z. Schlueter",
"email": "i@izs.me",
"url": "http://blog.izs.me/"
},
"license": "ISC",
"gitHead": "527f97aa5bb253d927348698c0cd3bb267d098c6",
"bugs": {
"url": "https://github.com/isaacs/sigmund/issues"
},
"homepage": "https://github.com/isaacs/sigmund#readme",
"_id": "sigmund@1.0.1",
"_shasum": "3ff21f198cad2175f9f3b781853fd94d0d19b590",
"_from": "sigmund@1.0.1",
"_npmVersion": "2.10.0",
"_nodeVersion": "2.0.1",
"_npmUser": {
"name": "isaacs",
"email": "isaacs@npmjs.com"
},
"dist": {
"shasum": "3ff21f198cad2175f9f3b781853fd94d0d19b590",
"tarball": "http://registry.npmjs.org/sigmund/-/sigmund-1.0.1.tgz"
},
"maintainers": [
{
"name": "isaacs",
"email": "i@izs.me"
}
],
"_resolved": "https://registry.npmjs.org/sigmund/-/sigmund-1.0.1.tgz",
"readme": "ERROR: No README data found!"
}
module.exports = sigmund
function sigmund (subject, maxSessions) {
maxSessions = maxSessions || 10;
var notes = [];
var analysis = '';
var RE = RegExp;
function psychoAnalyze (subject, session) {
if (session > maxSessions) return;
if (typeof subject === 'function' ||
typeof subject === 'undefined') {
return;
}
if (typeof subject !== 'object' || !subject ||
(subject instanceof RE)) {
analysis += subject;
return;
}
if (notes.indexOf(subject) !== -1 || session === maxSessions) return;
notes.push(subject);
analysis += '{';
Object.keys(subject).forEach(function (issue, _, __) {
// pseudo-private values. skip those.
if (issue.charAt(0) === '_') return;
var to = typeof subject[issue];
if (to === 'function' || to === 'undefined') return;
analysis += issue;
psychoAnalyze(subject[issue], session + 1);
});
}
psychoAnalyze(subject, 0);
return analysis;
}
// vim: set softtabstop=4 shiftwidth=4:
var test = require('tap').test
var sigmund = require('../sigmund.js')
// occasionally there are duplicates
// that's an acceptable edge-case. JSON.stringify and util.inspect
// have some collision potential as well, though less, and collision
// detection is expensive.
var hash = '{abc/def/g{0h1i2{jkl'
var obj1 = {a:'b',c:/def/,g:['h','i',{j:'',k:'l'}]}
var obj2 = {a:'b',c:'/def/',g:['h','i','{jkl']}
var obj3 = JSON.parse(JSON.stringify(obj1))
obj3.c = /def/
obj3.g[2].cycle = obj3
var cycleHash = '{abc/def/g{0h1i2{jklcycle'
test('basic', function (t) {
t.equal(sigmund(obj1), hash)
t.equal(sigmund(obj2), hash)
t.equal(sigmund(obj3), cycleHash)
t.end()
})
{
"name": "dedupe",
"description": "easy deduplication of array values",
"version": "2.1.0",
"repository": {
"type": "git",
"url": "git+https://github.com/seriousManual/dedupe.git"
},
"author": {
"name": "Manuel Ernst",
"email": "zaphod84@gmx.de"
},
"license": "MIT",
"dependencies": {
"sigmund": "1.0.1"
},
"devDependencies": {
"chai": "^3.5",
"mocha": "^3.0.0"
},
"files": [
"index.js"
],
"main": "./index.js",
"keywords": [
"duplicates",
"array",
"remove duplicates",
"distinct",
"deduplicate",
"deduplication"
],
"scripts": {
"test": "./node_modules/mocha/bin/mocha tests"
},
"engines": {
"node": ">=4.0"
},
"gitHead": "b20dd9435dae987ca2ca0a83517f4b54e95f591a",
"bugs": {
"url": "https://github.com/seriousManual/dedupe/issues"
},
"homepage": "https://github.com/seriousManual/dedupe#readme",
"_id": "dedupe@2.1.0",
"_shasum": "7f201d0142ca38e48762cfc33ff79816818bc347",
"_from": "dedupe@>=2.0.3 <3.0.0",
"_npmVersion": "3.10.8",
"_nodeVersion": "4.1.0",
"_npmUser": {
"name": "zaphod1984",
"email": "zaphod84@gmx.de"
},
"dist": {
"shasum": "7f201d0142ca38e48762cfc33ff79816818bc347",
"tarball": "https://registry.npmjs.org/dedupe/-/dedupe-2.1.0.tgz"
},
"maintainers": [
{
"name": "zaphod1984",
"email": "zaphod84@gmx.de"
}
],
"_npmOperationalInternal": {
"host": "packages-12-west.internal.npmjs.com",
"tmp": "tmp/dedupe-2.1.0.tgz_1476247685190_0.19174485001713037"
},
"directories": {},
"_resolved": "https://registry.npmjs.org/dedupe/-/dedupe-2.1.0.tgz"
}
language: node_js
node_js:
- "0.11"
- "0.10"
branches:
only:
- master
before_script:
- npm install -g istanbul
- npm install coveralls
- npm install mocha-lcov-reporter
after_script:
- NODE_ENV=test istanbul cover
./node_modules/mocha/bin/_mocha --report lcovonly -- -R spec &&
cat ./coverage/lcov.info |
./node_modules/coveralls/bin/coveralls.js && rm -rf ./coverage
\ No newline at end of file
# prettify-pinyin [![Build Status](https://travis-ci.org/johnheroy/prettify-pinyin.svg?branch=master)](https://travis-ci.org/johnheroy/prettify-pinyin) [![Coverage Status](https://img.shields.io/coveralls/johnheroy/prettify-pinyin.svg)](https://coveralls.io/r/johnheroy/prettify-pinyin)
prettify-pinyin will take your pinyin written with letters and numbers (i.e. 'ni3 hao3') and add tone marks, so you don't have to. I am using this module to show pretty formats for the pronunciation entries for the CC-CEDICT dictionary which is written with letters and numbers.
## Usage
```
var pinyin = require('prettify-pinyin');
pinyin.prettify('ni3 hao3'); // => 'nǐ hǎo'
pinyin.prettify('zhong1 guo2'); // => 'zhōng guó'
```
## License
MIT
\ No newline at end of file
// Quick guide for typing Chinese pinyin on Mac OS X
// Tone 1 (flat) mā – Option + a, then hit a vowel key
// Tone 2 (rising) má – Option + e, then hit a vowel key
// Tone 3 (falling-rising) mǎ – Option + v, then hit a vowel key
// Tone 4 (falling) mà – Option + `, then hit a vowel key
// ǚ – Option + V, then hit V (submitted by QA)
// ǜ – Option + `, then hit V (submitted by QA)
var replacements = {
'a': ['ā', 'á', 'ǎ', 'à'],
'e': ['ē', 'é', 'ě', 'è'],
'u': ['ū', 'ú', 'ǔ', 'ù'],
'i': ['ī', 'í', 'ǐ', 'ì'],
'o': ['ō', 'ó', 'ǒ', 'ò'],
'ü': ['ǖ', 'ǘ', 'ǚ', 'ǜ']
};
var medials = ['i', 'u', 'ü'];
var prettify = function(str){
str = str.replace('v', 'ü');
var syllables = str.split(' ');
for (var i = 0; i < syllables.length; i++){
var syllable = syllables[i];
var tone = parseInt(syllable[syllable.length-1]);
if (tone <= 0 || tone > 5) {
console.error('invalid tone number:', tone, 'in', syllable);
} else if (tone === 5){
syllables[i] = syllable.slice(0, syllable.length - 1);
} else {
for (var j = 0; j < syllable.length; j++){
var currentLetter = syllable[j];
var nextLetter = syllable[j + 1];
// found a vowel
if (replacements[currentLetter]){
var replaced;
var letterToReplace;
// two consecutive vowels
if (replacements[nextLetter] && medials.indexOf(currentLetter) >= 0){
letterToReplace = nextLetter;
} else {
letterToReplace = currentLetter;
}
replaced = syllable.replace(letterToReplace, replacements[letterToReplace][tone - 1]);
syllables[i] = replaced.slice(0, replaced.length - 1);
break;
}
}
}
}
return syllables.join(' ');
};
module.exports.prettify = prettify;
{
"name": "prettify-pinyin",
"version": "0.1.5",
"description": "Turns 'ni3 hao3' into 'nǐ hǎo' etc",
"main": "index.js",
"scripts": {
"test": "mocha",
"cover": "istanbul cover ./node_modules/mocha/bin/_mocha --report html -- -R spec"
},
"repository": {
"type": "git",
"url": "https://github.com/johnheroy/prettify-pinyin.git"
},
"keywords": [
"Chinese",
"汉字",
"pinyin",
"拼音"
],
"author": {
"name": "John Heroy"
},
"license": "MIT",
"bugs": {
"url": "https://github.com/johnheroy/prettify-pinyin/issues"
},
"homepage": "https://github.com/johnheroy/prettify-pinyin",
"devDependencies": {
"coveralls": "^2.11.2",
"mocha": "1.21.x",
"mocha-lcov-reporter": "0.0.1"
},
"gitHead": "802c2056fbefdf959c9fe20c57d45f0e02d97e4f",
"_id": "prettify-pinyin@0.1.5",
"_shasum": "da1f9ea8f08768c23c830064296eac2be05bcb51",
"_from": "prettify-pinyin@>=0.1.5 <0.2.0",
"_npmVersion": "1.4.28",
"_npmUser": {
"name": "johnheroy",
"email": "johnheroy@gmail.com"
},
"maintainers": [
{
"name": "johnheroy",
"email": "johnheroy@gmail.com"
}
],
"dist": {
"shasum": "da1f9ea8f08768c23c830064296eac2be05bcb51",
"tarball": "https://registry.npmjs.org/prettify-pinyin/-/prettify-pinyin-0.1.5.tgz"
},
"directories": {},
"_resolved": "https://registry.npmjs.org/prettify-pinyin/-/prettify-pinyin-0.1.5.tgz"
}
var assert = require('assert');
var pinyin = require('../index');
// selected spelling rules check from MIT
// http://web.mit.edu/jinzhang/www/pinyin/spellingrules/
describe('Prettify Pinyin', function(){
describe('(1) Syllables with one vowel letter:', function(){
it('The tone mark is always on the vowel letter.', function(){
assert.equal(pinyin.prettify('lv4'), 'lǜ');
assert.equal(pinyin.prettify('zhi1'), 'zhī');
assert.equal(pinyin.prettify('shan1'), 'shān');
assert.equal(pinyin.prettify('ting1'), 'tīng');
});
});
describe('(2) Syllables with two or three vowel letters:', function(){
it('(i) If the first vowel letter is a medial, namely, "i", "u", or "ü", the tone mark is on the vowel letter immediately following the medial.', function(){
assert.equal(pinyin.prettify('jiao1'), 'jiāo');
assert.equal(pinyin.prettify('lve4'), 'lüè');
assert.equal(pinyin.prettify('jiu3'), 'jiǔ');
assert.equal(pinyin.prettify('gui4'), 'guì');
});
it('(ii) If the first vowel letter is not a medial, the tone mark is always on the first vowel letter.', function(){
assert.equal(pinyin.prettify('hai3'), 'hǎi');
assert.equal(pinyin.prettify('zhao3'), 'zhǎo');
assert.equal(pinyin.prettify('shou3'), 'shǒu');
assert.equal(pinyin.prettify('gei3'), 'gěi');
});
});
});
\ No newline at end of file
var version = "0.3.1"; // 版本
var flagSimp = "simplified"; // 簡體
var flagTrad = "traditional"; // 繁體
var t2s = require('./dict/t2s.json'); // 繁轉簡 對照表
var s2t = require('./dict/s2t.json'); // 簡轉繁 對照表
var maxSTLen = 1; // 簡轉繁 最長的詞句
var maxTSLen = 1; // 繁轉簡 最長的詞句
var TongWen = (function () {
var that = this;
return {
version : version,
flagSimp : flagSimp,
flagTrad : flagTrad,
addS2TTable : addS2TTable,
addT2STable : addT2STable,
convert : convert,
s2t : function (str) { return convert(str, flagTrad); },
t2s : function (str) { return convert(str, flagSimp); }
}
})();
// 新增 簡轉繁 對照表
function addS2TTable(table) {
for (var i in table) {
maxSTLen = Math.max(maxSTLen, table[i].length);
s2t[i] = table[i];
}
}
// 新增 繁轉簡 對照表
function addT2STable(table) {
for (var i in table) {
maxTSLen = Math.max(maxTSLen, table[i].length);
t2s[i] = table[i];
}
}
function setZhFlag(doc, zhflag) {
doc.documentElement.setAttribute("zhtongwen", zhflag);
};
function getZhFlag(doc) {
var zhflag = "";
if (doc && doc.documentElement) {
zhflag = doc.documentElement.getAttribute("zhtongwen");
if (zhflag == null) zhflag = "";
}
return zhflag;
};
// 繁簡轉換
function convert(str, zhflag) {
var leng = 4;
var zmap = null;
if (zhflag == flagSimp) {
// 繁轉簡
zmap = t2s;
leng = Math.min(maxTSLen, str.length);
} else {
// 簡轉繁
zmap = s2t;
leng = Math.min(maxSTLen, str.length);
}
// 單字轉換
str = str.split("");
for (var i = 0, c = str.length; i < c; i++) {
str[i] = zmap[str[i]] || str[i];
}
str = str.join("");
// 詞彙轉換
var txt = "", s = "", bol = true;
for (var i = 0, c = str.length; i < c;) {
bol = true;
for (var j = leng; j > 1; j--) {
s = str.substr(i, j);
if (s in zmap) {
txt += zmap[s];
i += j;
bol = false;
break;
}
}
if (bol) {
txt += str.substr(i, 1);
i++;
}
}