Skip to content

Commit

Permalink
Updated Rabin-Karp search to use rolling hash module
Browse files Browse the repository at this point in the history
  • Loading branch information
Bruce-Feldman committed Jul 24, 2018
1 parent f9c6a0e commit 11367f2
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 77 deletions.
12 changes: 2 additions & 10 deletions src/algorithms/string/rabin-karp/__test__/rabinKarp.test.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,11 @@
import { rabinKarp, hashWord, reHashWord } from '../rabinKarp';
import rabinKarp from '../rabinKarp';

describe('rabinKarp', () => {
it('should correctly calculates hash and re-hash', () => {
expect(hashWord('a')).toBe(97);
expect(hashWord('b')).toBe(98);
expect(hashWord('abc')).toBe(941094);
expect(hashWord('bcd')).toBe(950601);
expect(reHashWord(hashWord('abc'), 'abc', 'bcd')).toBe(950601);
expect(reHashWord(hashWord('abc'), 'abc', 'bcd')).toBe(hashWord('bcd'));
});

it('should find substring in a string', () => {
expect(rabinKarp('', '')).toBe(0);
expect(rabinKarp('a', '')).toBe(0);
expect(rabinKarp('a', 'a')).toBe(0);
expect(rabinKarp('ab', 'b')).toBe(1);
expect(rabinKarp('abcbcglx', 'abca')).toBe(-1);
expect(rabinKarp('abcbcglx', 'bcgl')).toBe(3);
expect(rabinKarp('abcxabcdabxabcdabcdabcy', 'abcdabcy')).toBe(15);
Expand Down
83 changes: 16 additions & 67 deletions src/algorithms/string/rabin-karp/rabinKarp.js
Original file line number Diff line number Diff line change
@@ -1,77 +1,26 @@
/**
* A prime number used to create
* the hash representation of a word
*
* Bigger the prime number,
* bigger the hash value
*/
const PRIME = 97;

/**
* Function that creates hash representation of the word.
*
* @param {string} word
* @return {number}
*/
export function hashWord(word) {
let hash = 0;

for (let charIndex = 0; charIndex < word.length; charIndex += 1) {
hash += word.charCodeAt(charIndex) * (PRIME ** charIndex);
}

return hash;
}

/**
* Function that creates hash representation of the word
* based on previous word (shifted by one character left) hash value.
*
* Recalculates the hash representation of a word so that it isn't
* necessary to traverse the whole word again
*
* @param {number} prevHash
* @param {string} prevWord
* @param {string} newWord
* @return {number}
*/
export function reHashWord(prevHash, prevWord, newWord) {
const newWordLastIndex = newWord.length - 1;
let newHash = prevHash - prevWord.charCodeAt(0);
newHash /= PRIME;
newHash += newWord.charCodeAt(newWordLastIndex) * (PRIME ** newWordLastIndex);

return newHash;
}
import RabinFingerprint from '../../../utils/hash/rolling/Rabin_Fingerprint';

/**
* @param {string} text
* @param {string} word
* @return {number}
*/
export function rabinKarp(text, word) {
// Calculate word hash that we will use for comparison with other substring hashes.
const wordHash = hashWord(word);

let prevSegment = null;
let currentSegmentHash = null;

// Go through all substring of the text that may match
for (let charIndex = 0; charIndex <= text.length - word.length; charIndex += 1) {
const currentSegment = text.substring(charIndex, charIndex + word.length);

// Calculate the hash of current substring.
if (currentSegmentHash === null) {
currentSegmentHash = hashWord(currentSegment);
} else {
currentSegmentHash = reHashWord(currentSegmentHash, prevSegment, currentSegment);
}

prevSegment = currentSegment;
export default function rabinKarp(text, word) {
// The prime generation function could depend on the inputs for collision guarantees.
const hasher = new RabinFingerprint(() => 229);
const toCode = character => character.codePointAt(0);
const cmpVal = hasher.init(Array.from(word).map(toCode));

let currHash = hasher.init(Array.from(text.substring(0, word.length)).map(toCode));
if ((currHash === cmpVal) && (word.valueOf() === text.substring(0, word.length).valueOf())) {
return 0;
}

// Compare the hash of current substring and seeking string.
if ((wordHash === currentSegmentHash) && (currentSegment.valueOf() === word.valueOf())) {
return charIndex;
for (let i = 0; i < (text.length - word.length); i += 1) {
currHash = hasher.roll(text.codePointAt(i), text.codePointAt(i + word.length));
if ((currHash === cmpVal)
&& (word.valueOf() === text.substring(i + 1, i + word.length + 1).valueOf())) {
return i + 1;
}
}

Expand Down

0 comments on commit 11367f2

Please sign in to comment.