-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial migration from parent, liblevenshtein project
- Loading branch information
Dylon Edwards
committed
Mar 29, 2014
0 parents
commit a149d5a
Showing
19 changed files
with
2,145 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
*.sw? | ||
wiki | ||
.gradle/* | ||
node_modules/* | ||
build/* | ||
lib/* | ||
coverage/* | ||
docs/* | ||
.sass-cache/* | ||
npm-debug.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
require 'coffee-script/register' #-> Register the .coffee extension | ||
wrench = require 'wrench' | ||
|
||
fs = require 'fs' | ||
{print} = require 'sys' | ||
{spawn, exec} = require 'child_process' | ||
|
||
build = (watch, callback) -> | ||
if typeof watch is 'function' | ||
callback = watch | ||
watch = false | ||
options = ['-c', '-o', 'build', 'src'] | ||
options.unshift '-w' if watch | ||
|
||
coffee = spawn "#{__dirname}/node_modules/coffee-script/bin/coffee", options | ||
coffee.stdout.on 'data', (data) -> print data.toString() | ||
coffee.stderr.on 'data', (data) -> print data.toString() | ||
coffee.on 'exit', (status) -> | ||
throw new Error("An unexpected error occurred") if status isnt 0 | ||
|
||
countdown = | ||
count: 0 | ||
increment: () -> ++ @count | ||
decrement: () -> @callback() if 0 is (-- @count) and @callback | ||
callback: callback | ||
|
||
countdown.increment() | ||
concat_files = (path, files) -> | ||
fs.open path, 'w', null, (error, fd) -> | ||
throw error if error | ||
|
||
fs.writeSync fd, '/**\n' | ||
fs.writeSync fd, '@license\n' | ||
fs.writeSync fd, fs.readFileSync('../LICENSE', 'utf8') | ||
fs.writeSync fd, '\n' | ||
fs.writeSync fd, '*/\n' | ||
|
||
for file in files | ||
fs.writeSync fd, fs.readFileSync(file, 'utf8') | ||
fs.writeSync fd, '\n' | ||
|
||
fs.close fd, (error) -> throw error if error | ||
countdown.decrement() | ||
|
||
countdown.increment() | ||
concat_files 'build/liblevenshtein.js', do -> | ||
lib_files = [] | ||
for file in wrench.readdirSyncRecursive('build') | ||
lib_files.push("build/#{file}") if /\.js$/.test(file) | ||
lib_files | ||
|
||
countdown.increment() | ||
concat_files 'build/levenshtein-transducer.js', [ | ||
'build/collection/dawg.js' | ||
'build/collection/max-heap.js' | ||
'build/levenshtein/transducer.js' | ||
'build/levenshtein/builder.js' | ||
] | ||
|
||
countdown.increment() | ||
concat_files 'build/levenshtein-distance.js', [ | ||
'build/levenshtein/distance.js' | ||
] | ||
|
||
countdown.decrement() | ||
|
||
task 'docs', 'Generate annotated source code with Docco', -> | ||
src_files = [] | ||
for file in wrench.readdirSyncRecursive('src') | ||
path = "src/#{file}" | ||
src_files.push(path) if /\.coffee$/.test(path) | ||
docco = spawn "#{__dirname}/node_modules/docco/bin/docco", src_files | ||
docco.stdout.on 'data', (data) -> print data.toString() | ||
docco.stderr.on 'data', (data) -> print data.toString() | ||
docco.on 'exit', (status) -> callback?() if status is 0 | ||
|
||
task 'build', 'Compile CoffeeScript source files', -> | ||
build() | ||
|
||
task 'minify', 'Builds and minifies liblevenshtein.js', -> | ||
build -> | ||
closure = spawn 'gradle', ['minify'] | ||
closure.stdout.on 'data', (data) -> print data.toString() | ||
closure.stderr.on 'data', (data) -> print data.toString() | ||
|
||
task 'watch', 'Recompile CoffeeScript source files when modified', -> | ||
build true | ||
|
||
task 'test', 'Run the test suite', -> | ||
build -> | ||
{reporters} = require 'nodeunit' | ||
process.chdir __dirname | ||
reporters.default.run do -> | ||
test_dirs = ['test'] | ||
for file in wrench.readdirSyncRecursive('test') | ||
path = "test/#{file}" | ||
test_dirs.push(path) if fs.lstatSync(path).isDirectory() | ||
test_dirs.sort() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
apply plugin: 'java' | ||
|
||
repositories { | ||
mavenCentral() | ||
} | ||
|
||
dependencies { | ||
runtime 'com.google.javascript:closure-compiler:v20131014' | ||
} | ||
|
||
task minify_liblevenshtein(type: JavaExec) { | ||
main = 'com.google.javascript.jscomp.CommandLineRunner' | ||
classpath = sourceSets.main.runtimeClasspath | ||
args = [ | ||
'--compilation_level', 'ADVANCED_OPTIMIZATIONS', | ||
//'--output_wrapper', '(function(){"use strict";%output%}());', | ||
'--js_output_file', 'build/liblevenshtein.min.js', | ||
'--js', 'build/liblevenshtein.js' | ||
] | ||
} | ||
|
||
task minify_levenshtein_transducer(type: JavaExec) { | ||
main = 'com.google.javascript.jscomp.CommandLineRunner' | ||
classpath = sourceSets.main.runtimeClasspath | ||
args = [ | ||
'--compilation_level', 'ADVANCED_OPTIMIZATIONS', | ||
//'--output_wrapper', '(function(){"use strict";%output%}());', | ||
'--js_output_file', 'build/levenshtein-transducer.min.js', | ||
'--js', 'build/levenshtein-transducer.js' | ||
] | ||
} | ||
|
||
task minify_levenshtein_distance(type: JavaExec) { | ||
main = 'com.google.javascript.jscomp.CommandLineRunner' | ||
classpath = sourceSets.main.runtimeClasspath | ||
args = [ | ||
'--compilation_level', 'ADVANCED_OPTIMIZATIONS', | ||
//'--output_wrapper', '(function(){"use strict";%output%}());', | ||
'--js_output_file', 'build/levenshtein-distance.min.js', | ||
'--js', 'build/levenshtein-distance.js' | ||
] | ||
} | ||
|
||
task minify(dependsOn: [ | ||
'minify_liblevenshtein', | ||
'minify_levenshtein_transducer', | ||
'minify_levenshtein_distance' | ||
]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
{ | ||
"name": "liblevenshtein", | ||
"description": "Various utilities regarding Levenshtein transducers.", | ||
"author": "Dylon Edwards", | ||
"version": "2.0.1", | ||
"licenses": [ | ||
{ | ||
"type": "MIT", | ||
"url": "https://github.com/dylon/liblevenshtein/raw/master/LICENSE" | ||
} | ||
], | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/dylon/liblevenshtein.git" | ||
}, | ||
"main": null, | ||
"devDependencies": { | ||
"nodeunit": "~0.8.6", | ||
"coffee-script": "~1.7.1", | ||
"wrench": "~1.5.8", | ||
"docco": "~0.6.3", | ||
"seed-random": "~2.2.0" | ||
}, | ||
"engines": | ||
{ | ||
"node": "~0.10" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
# ============================================================================ | ||
# Taken and modified for my purposes from the following source: | ||
# o http://stevehanov.ca/blog/index.php?id=115 | ||
# ============================================================================ | ||
# | ||
# This class represents a node in the directed acyclic word graph (DAWG, | ||
# a.k.a. Minimal Acyclic Finite State Automaton, or MA-FSA). It has a list | ||
# of edges to other nodes. It has functions for testing whether it is | ||
# equivalent to another node. Nodes are equivalent if they have identical | ||
# edges, and each identical edge leads to identical states. | ||
class DawgNode | ||
@next_id = 0 | ||
|
||
constructor: -> | ||
@id = DawgNode.next_id; DawgNode.next_id += 1 | ||
@['is_final'] = false | ||
@['edges'] = {} | ||
|
||
bisect_left: (edges, edge, lower, upper) -> | ||
while lower < upper | ||
i = (lower + upper) >> 1 | ||
if edges[i] < edge | ||
lower = i + 1 | ||
else | ||
upper = i | ||
return lower | ||
|
||
'toString': -> | ||
edges = [] | ||
for label, node of @['edges'] # insertion sort | ||
edge = label + node.id.toString() | ||
edges.splice(@bisect_left(edges, edge, 0, edges.length), 0, edge) | ||
(+ @['is_final']) + edges.join('') | ||
|
||
class Dawg | ||
constructor: (dictionary) -> | ||
unless dictionary and typeof dictionary.length is 'number' | ||
throw new Error("Expected dictionary to be array-like") | ||
|
||
@previous_word = '' | ||
@['root'] = new DawgNode() | ||
|
||
# Here is a list of nodes that have not been checked for duplication. | ||
@unchecked_nodes = [] | ||
|
||
# Here is a list of unique nodes that have been checked for duplication. | ||
@minimized_nodes = {} | ||
|
||
@['insert'](word) for word in dictionary | ||
@finish() | ||
|
||
'insert': (word) -> | ||
# Find longest common prefix between word and previous word | ||
i = 0; previous_word = @previous_word | ||
|
||
upper_bound = | ||
if word.length < previous_word.length | ||
word.length | ||
else | ||
previous_word.length | ||
|
||
i += 1 while i < upper_bound and word[i] is previous_word[i] | ||
|
||
# Check the unchecked_nodes for redundant nodes, proceeding from last one | ||
# down to the common prefix size. Then truncate the list at that point. | ||
@minimize(i) | ||
unchecked_nodes = @unchecked_nodes | ||
|
||
# Add the suffix, starting from the correct node mid-way through the graph. | ||
if unchecked_nodes.length is 0 | ||
node = @['root'] | ||
else | ||
node = unchecked_nodes[unchecked_nodes.length - 1][2] | ||
|
||
while (character = word[i]) isnt `undefined` | ||
next_node = new DawgNode() | ||
node['edges'][character] = next_node | ||
unchecked_nodes.push([node, character, next_node]) | ||
node = next_node | ||
i += 1 | ||
|
||
node['is_final'] = true | ||
@previous_word = word | ||
return | ||
|
||
finish: -> | ||
# minimize all unchecked_nodes | ||
@minimize(0) | ||
return | ||
|
||
minimize: (lower_bound) -> | ||
# proceed from the leaf up to a certain point | ||
minimized_nodes = @minimized_nodes | ||
unchecked_nodes = @unchecked_nodes | ||
|
||
j = unchecked_nodes.length | ||
while j > lower_bound | ||
[parent, character, child] = unchecked_nodes.pop() | ||
child_key = child.toString() | ||
if child_key of minimized_nodes | ||
# replace the child with the previously encountered one | ||
parent['edges'][character] = minimized_nodes[child_key] | ||
else | ||
# add the state to the minimized nodes | ||
minimized_nodes[child_key] = child | ||
j -= 1 | ||
return | ||
|
||
'accepts': (word) -> | ||
node = @['root'] | ||
for edge in word | ||
node = node['edges'][edge] | ||
return false unless node | ||
node['is_final'] | ||
|
||
global = | ||
if typeof exports is 'object' | ||
exports | ||
else if typeof window is 'object' | ||
window | ||
else | ||
this | ||
|
||
global['levenshtein'] ||= {} | ||
global['levenshtein']['DawgNode'] = DawgNode | ||
global['levenshtein']['Dawg'] = Dawg | ||
|
Oops, something went wrong.