Skip to content

Commit

Permalink
Initial migration from parent, liblevenshtein project
Browse files Browse the repository at this point in the history
  • Loading branch information
Dylon Edwards committed Mar 29, 2014
0 parents commit a149d5a
Show file tree
Hide file tree
Showing 19 changed files with 2,145 additions and 0 deletions.
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
*.sw?
wiki
.gradle/*
node_modules/*
build/*
lib/*
coverage/*
docs/*
.sass-cache/*
npm-debug.log
99 changes: 99 additions & 0 deletions Cakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
require 'coffee-script/register' #-> Register the .coffee extension
wrench = require 'wrench'

fs = require 'fs'
{print} = require 'sys'
{spawn, exec} = require 'child_process'

build = (watch, callback) ->
if typeof watch is 'function'
callback = watch
watch = false
options = ['-c', '-o', 'build', 'src']
options.unshift '-w' if watch

coffee = spawn "#{__dirname}/node_modules/coffee-script/bin/coffee", options
coffee.stdout.on 'data', (data) -> print data.toString()
coffee.stderr.on 'data', (data) -> print data.toString()
coffee.on 'exit', (status) ->
throw new Error("An unexpected error occurred") if status isnt 0

countdown =
count: 0
increment: () -> ++ @count
decrement: () -> @callback() if 0 is (-- @count) and @callback
callback: callback

countdown.increment()
concat_files = (path, files) ->
fs.open path, 'w', null, (error, fd) ->
throw error if error

fs.writeSync fd, '/**\n'
fs.writeSync fd, '@license\n'
fs.writeSync fd, fs.readFileSync('../LICENSE', 'utf8')
fs.writeSync fd, '\n'
fs.writeSync fd, '*/\n'

for file in files
fs.writeSync fd, fs.readFileSync(file, 'utf8')
fs.writeSync fd, '\n'

fs.close fd, (error) -> throw error if error
countdown.decrement()

countdown.increment()
concat_files 'build/liblevenshtein.js', do ->
lib_files = []
for file in wrench.readdirSyncRecursive('build')
lib_files.push("build/#{file}") if /\.js$/.test(file)
lib_files

countdown.increment()
concat_files 'build/levenshtein-transducer.js', [
'build/collection/dawg.js'
'build/collection/max-heap.js'
'build/levenshtein/transducer.js'
'build/levenshtein/builder.js'
]

countdown.increment()
concat_files 'build/levenshtein-distance.js', [
'build/levenshtein/distance.js'
]

countdown.decrement()

task 'docs', 'Generate annotated source code with Docco', ->
src_files = []
for file in wrench.readdirSyncRecursive('src')
path = "src/#{file}"
src_files.push(path) if /\.coffee$/.test(path)
docco = spawn "#{__dirname}/node_modules/docco/bin/docco", src_files
docco.stdout.on 'data', (data) -> print data.toString()
docco.stderr.on 'data', (data) -> print data.toString()
docco.on 'exit', (status) -> callback?() if status is 0

task 'build', 'Compile CoffeeScript source files', ->
build()

task 'minify', 'Builds and minifies liblevenshtein.js', ->
build ->
closure = spawn 'gradle', ['minify']
closure.stdout.on 'data', (data) -> print data.toString()
closure.stderr.on 'data', (data) -> print data.toString()

task 'watch', 'Recompile CoffeeScript source files when modified', ->
build true

task 'test', 'Run the test suite', ->
build ->
{reporters} = require 'nodeunit'
process.chdir __dirname
reporters.default.run do ->
test_dirs = ['test']
for file in wrench.readdirSyncRecursive('test')
path = "test/#{file}"
test_dirs.push(path) if fs.lstatSync(path).isDirectory()
test_dirs.sort()

48 changes: 48 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
apply plugin: 'java'

repositories {
mavenCentral()
}

dependencies {
runtime 'com.google.javascript:closure-compiler:v20131014'
}

task minify_liblevenshtein(type: JavaExec) {
main = 'com.google.javascript.jscomp.CommandLineRunner'
classpath = sourceSets.main.runtimeClasspath
args = [
'--compilation_level', 'ADVANCED_OPTIMIZATIONS',
//'--output_wrapper', '(function(){"use strict";%output%}());',
'--js_output_file', 'build/liblevenshtein.min.js',
'--js', 'build/liblevenshtein.js'
]
}

task minify_levenshtein_transducer(type: JavaExec) {
main = 'com.google.javascript.jscomp.CommandLineRunner'
classpath = sourceSets.main.runtimeClasspath
args = [
'--compilation_level', 'ADVANCED_OPTIMIZATIONS',
//'--output_wrapper', '(function(){"use strict";%output%}());',
'--js_output_file', 'build/levenshtein-transducer.min.js',
'--js', 'build/levenshtein-transducer.js'
]
}

task minify_levenshtein_distance(type: JavaExec) {
main = 'com.google.javascript.jscomp.CommandLineRunner'
classpath = sourceSets.main.runtimeClasspath
args = [
'--compilation_level', 'ADVANCED_OPTIMIZATIONS',
//'--output_wrapper', '(function(){"use strict";%output%}());',
'--js_output_file', 'build/levenshtein-distance.min.js',
'--js', 'build/levenshtein-distance.js'
]
}

task minify(dependsOn: [
'minify_liblevenshtein',
'minify_levenshtein_transducer',
'minify_levenshtein_distance'
])
28 changes: 28 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"name": "liblevenshtein",
"description": "Various utilities regarding Levenshtein transducers.",
"author": "Dylon Edwards",
"version": "2.0.1",
"licenses": [
{
"type": "MIT",
"url": "https://github.com/dylon/liblevenshtein/raw/master/LICENSE"
}
],
"repository": {
"type": "git",
"url": "https://github.com/dylon/liblevenshtein.git"
},
"main": null,
"devDependencies": {
"nodeunit": "~0.8.6",
"coffee-script": "~1.7.1",
"wrench": "~1.5.8",
"docco": "~0.6.3",
"seed-random": "~2.2.0"
},
"engines":
{
"node": "~0.10"
}
}
127 changes: 127 additions & 0 deletions src/collection/dawg.coffee
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# ============================================================================
# Taken and modified for my purposes from the following source:
# o http://stevehanov.ca/blog/index.php?id=115
# ============================================================================
#
# This class represents a node in the directed acyclic word graph (DAWG,
# a.k.a. Minimal Acyclic Finite State Automaton, or MA-FSA). It has a list
# of edges to other nodes. It has functions for testing whether it is
# equivalent to another node. Nodes are equivalent if they have identical
# edges, and each identical edge leads to identical states.
class DawgNode
@next_id = 0

constructor: ->
@id = DawgNode.next_id; DawgNode.next_id += 1
@['is_final'] = false
@['edges'] = {}

bisect_left: (edges, edge, lower, upper) ->
while lower < upper
i = (lower + upper) >> 1
if edges[i] < edge
lower = i + 1
else
upper = i
return lower

'toString': ->
edges = []
for label, node of @['edges'] # insertion sort
edge = label + node.id.toString()
edges.splice(@bisect_left(edges, edge, 0, edges.length), 0, edge)
(+ @['is_final']) + edges.join('')

class Dawg
constructor: (dictionary) ->
unless dictionary and typeof dictionary.length is 'number'
throw new Error("Expected dictionary to be array-like")

@previous_word = ''
@['root'] = new DawgNode()

# Here is a list of nodes that have not been checked for duplication.
@unchecked_nodes = []

# Here is a list of unique nodes that have been checked for duplication.
@minimized_nodes = {}

@['insert'](word) for word in dictionary
@finish()

'insert': (word) ->
# Find longest common prefix between word and previous word
i = 0; previous_word = @previous_word

upper_bound =
if word.length < previous_word.length
word.length
else
previous_word.length

i += 1 while i < upper_bound and word[i] is previous_word[i]

# Check the unchecked_nodes for redundant nodes, proceeding from last one
# down to the common prefix size. Then truncate the list at that point.
@minimize(i)
unchecked_nodes = @unchecked_nodes

# Add the suffix, starting from the correct node mid-way through the graph.
if unchecked_nodes.length is 0
node = @['root']
else
node = unchecked_nodes[unchecked_nodes.length - 1][2]

while (character = word[i]) isnt `undefined`
next_node = new DawgNode()
node['edges'][character] = next_node
unchecked_nodes.push([node, character, next_node])
node = next_node
i += 1

node['is_final'] = true
@previous_word = word
return

finish: ->
# minimize all unchecked_nodes
@minimize(0)
return

minimize: (lower_bound) ->
# proceed from the leaf up to a certain point
minimized_nodes = @minimized_nodes
unchecked_nodes = @unchecked_nodes

j = unchecked_nodes.length
while j > lower_bound
[parent, character, child] = unchecked_nodes.pop()
child_key = child.toString()
if child_key of minimized_nodes
# replace the child with the previously encountered one
parent['edges'][character] = minimized_nodes[child_key]
else
# add the state to the minimized nodes
minimized_nodes[child_key] = child
j -= 1
return

'accepts': (word) ->
node = @['root']
for edge in word
node = node['edges'][edge]
return false unless node
node['is_final']

global =
if typeof exports is 'object'
exports
else if typeof window is 'object'
window
else
this

global['levenshtein'] ||= {}
global['levenshtein']['DawgNode'] = DawgNode
global['levenshtein']['Dawg'] = Dawg

Loading

0 comments on commit a149d5a

Please sign in to comment.