Skip to content

Commit

Permalink
Merge branch 'mongo' into next
Browse files Browse the repository at this point in the history
  • Loading branch information
nehhen committed Aug 29, 2010
2 parents 7eda7c6 + 27491c0 commit 82aa853
Show file tree
Hide file tree
Showing 6 changed files with 195 additions and 20 deletions.
33 changes: 33 additions & 0 deletions lib/anemone/page.rb
Original file line number Diff line number Diff line change
Expand Up @@ -165,5 +165,38 @@ def marshal_load(ary)
@url, @headers, @data, @body, @links, @code, @visited, @depth, @referer, @redirect_to, @response_time, @fetched = ary
end

def to_hash
{'url' => @url.to_s,
'headers' => Marshal.dump(@headers),
'data' => Marshal.dump(@data),
'body' => @body,
'links' => links.map(&:to_s),
'code' => @code,
'visited' => @visited,
'depth' => @depth,
'referer' => @referer.to_s,
'redirect_to' => @redirect_to.to_s,
'response_time' => @response_time,
'fetched' => @fetched}
end

def self.from_hash(hash)
page = self.new(URI(hash['url']))
{'@headers' => Marshal.load(hash['headers']),
'@data' => Marshal.load(hash['data']),
'@body' => hash['body'],
'@links' => hash['links'].map { |link| URI(link) },
'@code' => hash['code'].to_i,
'@visited' => hash['visited'],
'@depth' => hash['depth'].to_i,
'@referer' => hash['referer'],
'@redirect_to' => URI(hash['redirect_to']),
'@response_time' => hash['response_time'].to_i,
'@fetched' => hash['fetched']
}.each do |var, value|
page.instance_variable_set(var, value)
end
page
end
end
end
9 changes: 8 additions & 1 deletion lib/anemone/storage.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,17 @@ def self.PStore(*args)
self::PStore.new(*args)
end

def self.TokyoCabinet(file)
def self.TokyoCabinet(file = 'anemone.tch')
require 'anemone/storage/tokyo_cabinet'
self::TokyoCabinet.new(file)
end

def self.MongoDB(mongo_db = nil, collection_name = 'pages')
require 'anemone/storage/mongodb'
mongo_db ||= Mongo::Connection.new.db('anemone')
raise "First argument must be an instance of Mongo::DB" unless mongo_db.is_a?(Mongo::DB)
self::MongoDB.new(mongo_db, collection_name)
end

end
end
80 changes: 80 additions & 0 deletions lib/anemone/storage/mongodb.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
begin
require 'mongo'
rescue LoadError
puts "You need the mongo gem to use Anemone::Storage::MongoDB"
exit
end

module Anemone
module Storage
class MongoDB

def initialize(mongo_db, collection_name)
@db = mongo_db
@collection = @db[collection_name]
@collection.remove
@collection.create_index 'url'
end

def [](url)
if value = @collection.find_one('url' => url.to_s)
load_page(value)
end
end

def []=(url, page)
@collection.update(
{'url' => page.url.to_s},
page.to_hash,
:upsert => true
)
end

def delete(url)
page = self[url]
@collection.remove('url' => url.to_s)
page
end

def each
@collection.find do |cursor|
cursor.each do |doc|
page = load_page(doc)
yield page.url.to_s, page
end
end
end

def merge!(hash)
hash.each { |key, value| self[key] = value }
self
end

def size
@collection.count
end

def keys
keys = []
self.each { |k, v| keys << k.to_s }
keys
end

def has_key?(url)
!!@collection.find_one('url' => url.to_s)
end

def close
@db.connection.close
end

private

def load_page(hash)
Page.from_hash(hash)
end

end
end
end

17 changes: 16 additions & 1 deletion spec/page_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ module Anemone
before(:each) do
FakeWeb.clean_registry
@http = Anemone::HTTP.new
@page = @http.fetch_page(FakePage.new('home').url)
@page = @http.fetch_page(FakePage.new('home', :links => '1').url)
end

it "should indicate whether it successfully fetched via HTTP" do
Expand Down Expand Up @@ -73,5 +73,20 @@ module Anemone
@page.cookies.should == []
end

it "should have a to_hash method that converts the page to a hash" do
hash = @page.to_hash
hash['url'].should == @page.url.to_s
hash['referer'].should == @page.referer.to_s
hash['links'].should == @page.links.map(&:to_s)
end

it "should have a from_hash method to convert from a hash to a Page" do
page = @page.dup
page.depth = 1
converted = Page.from_hash(page.to_hash)
converted.links.should == page.links
converted.depth.should == page.depth
end

end
end
16 changes: 14 additions & 2 deletions spec/page_store_spec.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
require File.dirname(__FILE__) + '/spec_helper'
%w[pstore tokyo_cabinet].each { |file| require "anemone/storage/#{file}.rb" }
%w[pstore tokyo_cabinet mongodb].each { |file| require "anemone/storage/#{file}.rb" }

module Anemone
describe PageStore do
Expand All @@ -9,7 +9,7 @@ module Anemone
end

shared_examples_for "page storage" do
it "should be able to computer single-source shortest paths in-place" do
it "should be able to compute single-source shortest paths in-place" do
pages = []
pages << FakePage.new('0', :links => ['1', '3'])
pages << FakePage.new('1', :redirect => '2')
Expand Down Expand Up @@ -124,5 +124,17 @@ module Anemone
end
end

describe Storage::MongoDB do
it_should_behave_like "page storage"

before(:each) do
@opts = {:storage => @store = Storage.MongoDB}
end

after(:each) do
@store.close
end
end

end
end
60 changes: 44 additions & 16 deletions spec/storage_spec.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
require File.dirname(__FILE__) + '/spec_helper'
%w[pstore tokyo_cabinet].each { |file| require "anemone/storage/#{file}.rb" }
%w[pstore tokyo_cabinet mongodb].each { |file| require "anemone/storage/#{file}.rb" }

module Anemone
describe Storage do
Expand All @@ -23,59 +23,76 @@ module Anemone
store.close
end

it "should have a class method to produce a MongoDB" do
Anemone::Storage.should respond_to(:MongoDB)
store = Anemone::Storage.MongoDB
store.should be_an_instance_of(Anemone::Storage::MongoDB)
store.close
end

module Storage
shared_examples_for "storage engine" do

before(:each) do
@url = SPEC_DOMAIN
@page = Page.new(URI(@url))
end

it "should implement [] and []=" do
@store.should respond_to(:[])
@store.should respond_to(:[]=)

@store['index'] = 'test'
@store['index'].should == 'test'
@store[@url] = @page
@store[@url].url.should == URI(@url)
end

it "should implement has_key?" do
@store.should respond_to(:has_key?)

@store['index'] = 'test'
@store.has_key?('index').should == true
@store[@url] = @page
@store.has_key?(@url).should == true

@store.has_key?('missing').should == false
end

it "should implement delete" do
@store.should respond_to(:delete)

@store['index'] = 'test'
@store.delete('index').should == 'test'
@store.has_key?('index').should == false
@store[@url] = @page
@store.delete(@url).url.should == @page.url
@store.has_key?(@url).should == false
end

it "should implement keys" do
@store.should respond_to(:keys)

keys = ['a', 'b', 'c']
keys.each { |key| @store[key] = key }
urls = [SPEC_DOMAIN, SPEC_DOMAIN + 'test', SPEC_DOMAIN + 'another']
pages = urls.map { |url| Page.new(URI(url)) }
urls.zip(pages).each { |arr| @store[arr[0]] = arr[1] }

@store.keys.should == keys
(@store.keys - urls).should == []
end

it "should implement each" do
@store.should respond_to(:each)

keys = ['a', 'b', 'c']
keys.each { |key| @store[key] = key }
urls = [SPEC_DOMAIN, SPEC_DOMAIN + 'test', SPEC_DOMAIN + 'another']
pages = urls.map { |url| Page.new(URI(url)) }
urls.zip(pages).each { |arr| @store[arr[0]] = arr[1] }

result = {}
@store.each { |k, v| result[k] = v }
result.values.should == keys
(result.keys - urls).should == []
(result.values.map { |page| page.url.to_s } - urls).should == []
end

it "should implement merge!, and return self" do
@store.should respond_to(:merge!)

hash = {'a' => 'a', 'b' => 'b', 'c' => 'c'}
hash = {SPEC_DOMAIN => Page.new(URI(SPEC_DOMAIN)),
SPEC_DOMAIN + 'test' => Page.new(URI(SPEC_DOMAIN + 'test'))}
merged = @store.merge! hash
hash.each { |key, value| @store[key].should == value }
hash.each { |key, value| @store[key].url.to_s.should == key }

merged.should === @store
end
Expand Down Expand Up @@ -115,7 +132,18 @@ module Storage
it "should raise an error if supplied with a file extension other than .tch" do
lambda { Anemone::Storage.TokyoCabinet('test.tmp') }.should raise_error(RuntimeError)
end
end

describe Storage::MongoDB do
it_should_behave_like "storage engine"

before(:each) do
@opts = {:storage => @store = Storage.MongoDB}
end

after(:each) do
@store.close
end
end

end
Expand Down

0 comments on commit 82aa853

Please sign in to comment.