Commit 148f0f84 authored by Jarrod's avatar Jarrod 💬

Moved stuff

parent b2836491
################################################
# ┌─┐┬┌┬┐╦╔═╗╔╗╔╔═╗╦═╗╔═╗
# │ ┬│ │ ║║ ╦║║║║ ║╠╦╝║╣
# o└─┘┴ ┴ ╩╚═╝╝╚╝╚═╝╩╚═╚═╝
#
# > Files to exclude from your app's repo.
#
# This file (`.gitignore`) is only relevant if
# you are using git.
#
# It exists to signify to git that certain files
# and/or directories should be ignored for the
# purposes of version control.
#
# This keeps tmp files and sensitive credentials
# from being uploaded to your repository. And
# it allows you to configure your app for your
# machine without accidentally committing settings
# which will smash the local settings of other
# developers on your team.
#
# Some reasonable defaults are included below,
# but, of course, you should modify/extend/prune
# to fit your needs!
#
################################################
package-lock.json
################################################
# Local Configuration
#
# Explicitly ignore files which contain:
#
# 1. Sensitive information you'd rather not push to
# your git repository.
# e.g., your personal API keys or passwords.
#
# 2. Developer-specific configuration
# Basically, anything that would be annoying
# to have to change every time you do a
# `git pull` on your laptop.
# e.g. your local development database, or
# the S3 bucket you're using for file uploads
# during development.
#
################################################
config/local.js
################################################
# Dependencies
#
#
# When releasing a production app, you _could_
# hypothetically include your node_modules folder
# in your git repo, but during development, it
# is always best to exclude it, since different
# developers may be working on different kernels,
# where dependencies would need to be recompiled
# anyway.
#
# Most of the time, the node_modules folder can
# be excluded from your code repository, even
# in production, thanks to features like the
# package-lock.json file / NPM shrinkwrap.
#
# But no matter what, since this is a Sails app,
# you should always push up the package-lock.json
# or shrinkwrap file to your repository, to avoid
# accidentally pulling in upgraded dependencies
# and breaking your code.
#
# That said, if you are having trouble with
# dependencies, (particularly when using
# `npm link`) this can be pretty discouraging.
# But rather than just adding the lockfile to
# your .gitignore, try this first:
# ```
# rm -rf node_modules
# rm package-lock.json
# npm install
# ```
#
# [?] For more tips/advice, come by and say hi
# over at https://sailsjs.com/support
#
################################################
node_modules
################################################
#
# > Do you use bower?
# > re: the bower_components dir, see this:
# > http://addyosmani.com/blog/checking-in-front-end-dependencies/
# > (credit Addy Osmani, @addyosmani)
#
################################################
################################################
# Temporary files generated by Sails/Waterline.
################################################
.tmp
################################################
# Miscellaneous
#
# Common files generated by text editors,
# operating systems, file systems, dbs, etc.
################################################
*~
*#
.DS_STORE
.netbeans
nbproject
.idea
*.iml
.vscode
.node_history
dump.rdb
npm-debug.log
lib-cov
*.seed
*.log
*.out
*.pid
This diff is collapsed.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
const mcache = require('memory-cache');
module.exports = {
friendlyName: 'Get admin dashboard',
description: '',
fn: async function(inputs) {
let dashboard = {
users: await User.count(),
words: await Word.count(),
wordLists: await WordList.count(),
searches24hr: await DictionarySearchLog.count({
createdAt: { '>=': new Date(Date.now() - (60 * 60 * 24 * 1000)) }
}),
cache: {
size: mcache.size()
}
}
if (typeof Comment !== 'undefined') {
dashboard.comments = await Comment.count()
}
return { dashboard }
}
}
\ No newline at end of file
module.exports = {
friendlyName: 'Get the daily idiom',
description: '',
inputs: {
},
fn: async function(inputs) {
// offset = (approx) number of days since 18 Dec 2018 UTC time
let offset = Math.floor(Date.now() / 8.64e7) - 17882;
let sql = `SELECT id, simplified, traditional, pinyin, definitions, "hskLevel"
FROM word
WHERE definitions &@ 'idiom'
AND length(traditional) = 4
ORDER BY id ASC
OFFSET ${offset}
LIMIT 1;`;
let result = await sails.sendNativeQuery(sql)
return result && result.rows && result.rows.length ? { idiom: result.rows[0] } : { idiom: null }
}
}
\ No newline at end of file
const util = require('util');
const decomp = require('@nahanil/zh-decomp');
const getUnihan = util.promisify(require('cjk-unihan').get);
module.exports = {
friendlyName: 'Get a dictionary entry',
description: '',
inputs: {
word: {
type: 'string',
required: true
},
full: {
type: 'ref',
defaultsTo: false
}
},
fn: async function(inputs) {
const { res } = this
const query = inputs.word
let response = {}
response.words = await Word.find({
where: {
or: [
{ simplified: query },
{ traditional: query },
]
}
})//.select(...)
.sort(['hskLevel ASC', 'frequency DESC', 'pinyin ASC'])
if (!response.words || !response.words.length) {
if (query.length === 1) {
return (await tryUnihanLookup(res, query))
}
return res.notFound()
}
let variants = [];
let allChars = [];
let charData = {};
let strokeData = {};
// response.words = words.map(async (w) => {
for (let i=0; i < response.words.length; i++) {
let w = response.words[i];
if (inputs.full) {
allChars.push(...w.traditional.split(''));
allChars.push(...w.simplified.split(''));
if (w.simplified.length === 1) {
if (w.variants) {
variants.push(...w.variants);
}
['simplified', 'traditional'].forEach((variant) => {
if (!charData[w[variant]]) {
let ucKey = variant === 'traditional' ? 'Traditional' : 'Simplified';
charData[w[variant]] = {
radical: w[`radical${ucKey}`],
strokes: w[`strokes${ucKey}`],
strokesMinRad: w[`strokesMinRad${ucKey}`],
decomp: decomp.lookup(w[variant])
};
}
});
}
}
response.words[i] = {
id: w.id,
traditional: w.traditional,
simplified: w.simplified,
definitions: w.definitions,
hskLevel: w.hskLevel || null,
pinyin: w.pinyin,
audio: await sails.helpers.zhdict.audio.getAudioUriForWord(w)
};
}
// Return a 'short version' of this data for the popup things
if (!inputs.full) {
return response
}
if (response.words[0].length !== 1) {
// Try tokenize/find components that makeup compound words
let parts = app.tokenizer_t.tokenize(response.words[0].traditional);
if (parts.length !== 1) {
response.parts = parts.map((r) => {
r.definitions = r.english ? r.english.split('/') : [];
delete r.english;
return r;
});
}
}
response.charData = charData;
if (variants && variants.length) {
response.variants = _.uniq(variants);
}
// Load stroke animation data
_.uniq(allChars).forEach((char) => {
try {
strokeData[char] = require('hanzi-writer-data/' + char);
} catch(e) {}
});
response.strokeData = strokeData;
// Try to sort results a bit lesss shitty-ly
if (response.words.length > 1) {
response.words = response.words.sort((a, b) => {
// return a - b
let x = a.definitions.filter(x => x.indexOf('variant') !== -1).length ? 1 : 0;
let y = b.definitions.filter(x => x.indexOf('variant') !== -1).length ? 1 : 0;
return x - y;
});
}
return response;
}
}
async function tryUnihanLookup(res, query) {
if (query.length !== 1) { return res.notFound('No entry found'); }
// TODO Check the unihan stuff
let char;
try {
char = await getUnihan(query);
} catch (err) {
sails.log.error('Unable to load unihan character data', err.stack)
return res.serverError('Unable to load results')
}
if (!char) {
return res.notFound('No entry found');
}
let response = {
words: [{
simplified: char.character,
traditional: char.character,
pinyin: (char.kHanyuPinyin || '').replace(/^[0-9.]+:/, ''),
definitions: char.kDefinition ? char.kDefinition.split(/;\s*/) : [],
}]
};
try {
strokes = require('hanzi-writer-data/' + char.character);
if (strokes) {
response.strokeData = {};
response.strokeData[`${char.character}`] = strokes;
}
} catch(e) {}
let charData = { };
let variants = [];
function parseVariants(input) {
// 'U+6C35<kMatthews,U+6C35<kMatthews'.match(/(U[+][0-9A-F]+)/g)
// let variants = [];
if (!input) { return; }
input.match(/(U[+][0-9A-F]+)/g).forEach((match) => {
variants.push(String.fromCharCode(parseInt(match.replace('U+', ''), 16)));
});
// return variants;
}
[
'kSemanticVariant',
'kSimplifiedVariant',
'kTraditionalVariant',
'kZVariant',
'kCompatibilityVariant',
'kSpecializedSemanticVariant'
].forEach((key) => {
if (char[key]) {
parseVariants(char[key]);
}
});
if (variants.length) {
response.variants = _.uniq(variants);
}
return response
}
\ No newline at end of file
module.exports = {
friendlyName: 'Get a HSK vocabulary list',
description: '',
inputs: {
level: {
type: 'number',
required: true,
isInteger: true,
min: 1,
max: 6
}
},
fn: async function(inputs) {
const words = await Word.find({ hskLevel: inputs.level })
.select(['simplified', 'traditional', 'pinyin', 'definitions'])
.sort(['frequency DESC', 'pinyin ASC'])
return { words }
}
}
\ No newline at end of file
const _ = require('lodash');
module.exports = {
friendlyName: 'Get a Chinese radical and characters containing it',
description: '',
todo: 'Use Word model rather than Sequelize thingy',
inputs: {
radical: {
type: 'string',
required: true
}
},
fn: async function(inputs) {
const radicals = require('../../data/radicals');
let radical = inputs.radical;
let info = radicals.filter((r) => { return r.radical === radical || r.simplified === radical || r.variants.match(radical) }).shift();
if (!info) {
return res.notFound('Radical not found')
}
const results = await Word.find({
where: {
or: [
{ radicalSimplified: radical },
{ radicalTraditional: radical }
]
}
}).select(['traditional', 'simplified', 'pinyin', 'definitions', 'radicalSimplified', 'radicalTraditional', 'strokesMinRadSimplified', 'strokesMinRadTraditional'])
let chars = {};
results.forEach((r) => {
if (r.radicalSimplified == radical) {
if (!chars[ r.strokesMinRadSimplified ]) { chars[ r.strokesMinRadSimplified ] = []; }
chars[ r.strokesMinRadSimplified ].push({
char: r.simplified,
pinyin: r.pinyin,
definitions: r.definitions
})
}
if (r.radicalTraditional == radical) {
if (!chars[ r.strokesMinRadTraditional ]) { chars[ r.strokesMinRadTraditional ] = []; }
chars[ r.strokesMinRadTraditional ].push({
char: r.traditional,
pinyin: r.pinyin,
definitions: r.definitions
})
}
});
// remove inevitable duplicates. #shitpile
Object.keys(chars).forEach((key) => {
chars[key] = _.uniqWith(chars[key], (a, b) => {
return a.char === b.char && (parseInt(a.pinyin.substr(-1)) || 5) === (parseInt(b.pinyin.substr(-1)) || 5);
});
});
return { radical: info, characters: chars };
}
}
\ No newline at end of file
module.exports = {
friendlyName: 'Get example sentences for a given word',
description: '',
inputs: {
word: {
type: 'string',
required: true
}
},
fn: async function(inputs) {
const { req, res } = this
const word = inputs.word.trim().replace(/[%_'"]/g, '')
if (!word) {
return res.badRequest('Invalid/missing query word')
}
const qyWord = `%${word}%`
const sentences = await ExampleSentence.find({
where: {
or: [
{ simplified: { like: qyWord } },
{ traditional: { like: qyWord } },
]
}
})
.select([ 'id', 'simplified', 'traditional', 'pinyin', 'translations', 'audio' ])
.limit(40) // 的 returns > 400 results!
// Munge and tokenify
sentences.forEach((s) => {
// s.audio = (req.secure ? 'https://' : 'http://') + req.headers.host + '/audio/' + s.audio;
s.tokens = { simplified: [], traditional: [] }
for (let t of sails.helpers.zhdict.tokenizer.tokenize(s.traditional)) {
s.tokens.simplified.push({ l: !!t.pinyin, w: t.simplified })
s.tokens.traditional.push({ l: !!t.pinyin, w: t.traditional })
}
});
return { sentences }
}
}
\ No newline at end of file
module.exports = {
friendlyName: 'Get a list of Chinese radicals',
description: '',
fn: async function(inputs) {
const radicals = require('../../data/radicals');
return {
radicals: radicals.map((r) => { r.traditional = r.radical; return r; })
}
}
}
\ No newline at end of file
const chineseConv = require('chinese-conv');
// const corenlp = require('../../lib/helpers/corenlp');
module.exports = {
friendlyName: 'Search the dictionary',
description: '',
inputs: {
q: {
type: 'string',
required: true,
minLength: 1,
maxLength: 100,
},
page: {
type: 'number',
isInteger: true,
defaultsTo: 1,
min: 1,
max: 99999,
}
},
fn: async function(inputs) {
const { req } = this
const { q, page } = inputs
const limit = 20
if (page < 1) { page = 1 }
let offset = page * limit - limit
let searchSql = `SELECT
traditional,
simplified,
pinyin,
definitions,
"hskLevel",
frequency,
pgroonga_score(tableoid, ctid) AS score,
CASE WHEN "hskLevel" = 6 THEN 1
WHEN "hskLevel" = 5 THEN 2
WHEN "hskLevel" = 4 THEN 3
WHEN "hskLevel" = 3 THEN 4
WHEN "hskLevel" = 2 THEN 5
WHEN "hskLevel" = 1 THEN 6
WHEN "hskLevel" is null THEN 0.00001
END,
((pgroonga_score(tableoid, ctid) + (w.frequency/5) + ("hskLevel" / 6)) /2) AS "orderScore"
FROM word w
WHERE traditional &@ $1
OR simplified &@ $1
OR definitions &@ $1
OR pronunciation &@ $1
ORDER BY "orderScore" DESC
NULLS LAST
OFFSET $3
LIMIT $2`
let countSql = `SELECT COUNT(1) AS count FROM word w
WHERE traditional &@ $1
OR simplified &@ $1
OR definitions &@ $1
OR pronunciation &@ $1`
let results
let totalResults
try {
results = (await sails.sendNativeQuery(searchSql, [ q, limit, offset ])).rows
totalResults = (await sails.sendNativeQuery(countSql, [ q ])).rows[0].count
} catch (err) {
console.error(`Unable to perform dictionary search for ${q}`, err.stack)
return res.status(500).json('Unable to perform search')
}
// Fall back to splitting a potentially long input to find chinesey words
if (!results || !results.length) {
return tryTokenizedSearch(this.req, q)
}
DictionarySearchLog.create({
query: q,
user: req.me ? req.me.id : null,
ip: req.ip,
totalResults: totalResults
}, (err, res) => { /* Do this 'in the background'... */ })
// console.log(results)
// results = results.sort((a,b) => {
// return b.sortScore - a.sortScore
// })
return {
total: totalResults,
pages: Math.ceil(totalResults / limit),
results: results
}
}
}
function tryTokenizedSearch(req, query) {
// Normalize query (trad/simp)
query = chineseConv.tify(query);
let tokens = app.tokenizer_t.tokenize(query).filter((t) => { return t.pinyin && t.english });
DictionarySearchLog.create({
query,
user: req.me ? req.me.id : null,
ip: req.ip,
totalResults: tokens.length
}, (err, res) => { /* Do this 'in the background'... */ })
if (!tokens || !tokens.length) {
return { total: 0, pages: 0, results: [] }
}
tokens = tokens.map((t) => {
t.definitions = t.english.split('/');
t.pinyin = t.pinyin.replace(/[/]/g, ' / ');
delete t.english;
return t;
});
return {
total: tokens.length,
pages: 1,
results: tokens
}
}
// TODO: This uses CoreNLP to tokenize the string, then the old node/cedict
// tokenizer to split up any tokenized-but-not-found-in-cedict-words into
// their smaller components. Not sure if CoreNLP will handle heavy traffic
// well, so for now just using the old method `tryTokenizedSearch()`
/*
async function tryTokenizedSearchNg(req, res, next) {
let query = chineseConv.sify(req.inputs.q);
let parsed = await corenlp.tokenize(query);
let tokens = [];
let ogTokens = [];
if (parsed && parsed.sentences) {
parsed.sentences.forEach((s) => {
if (s.tokens) {
s.tokens.forEach((t) => {
ogTokens.push(t)
tokens.push(
...app.tokenizer_t.tokenize(chineseConv.tify(t.word), true).map((s) => {
if (s.pinyin) s.pinyin.replace(/[/]/g, ' / ');
s.definitions = s.english ? s.english.split('/') : [];
delete s.english;
return s;
})
)
})
}
})
}
return res.status(419).json({
ogTokens,
tokens,
});
}
*/
/*
* corenlp helper
*/
/*
const tokenizer_t = require("@zhdict/tokenizer")(require('path').join(__dirname, '../../data/cedict_ts.u8'), 'traditional');
const request = require('request');
const querystring = require('querystring');