Skip to content
This repository has been archived by the owner on Sep 18, 2021. It is now read-only.

Commit

Permalink
Merge branch 'master' of github.com:twitter/twitter-text-rb into punc…
Browse files Browse the repository at this point in the history
…t_before_url

Conflicts:
	lib/autolink.rb
	lib/regex.rb
  • Loading branch information
keita committed Feb 8, 2012
2 parents 2f30f1d + dc7509f commit 34fa8a8
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 21 deletions.
15 changes: 11 additions & 4 deletions README.rdoc
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,16 @@ numbers but cannot be solely numbers and cannot contain punctuation.

=== URLs

Auto-linking and extraction of URLs differs from the Rails default so that it
will work correctly in Tweets written in languages that do not include spaces
between words.
Asian languages like Chinese, Japanese or Korean may not use a delimiter such as
a space to separate normal text from URLs making it difficult to identify where
the URL ends and the text starts.

For this reason twitter-text currently does not support extracting or auto-linking
of URLs immediately followed by non-Latin characters.

Example: "http://twitter.com/は素晴らしい" .
The normal text is "は素晴らしい" and is not part of the URL even though
it isn't space separated.

=== International

Expand Down Expand Up @@ -112,4 +119,4 @@ Thanks to everybody who has filed issues, provided feedback or contributed patch
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
limitations under the License.
10 changes: 5 additions & 5 deletions lib/autolink.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ def auto_link_entities(text, entities, options)
options[:url_class] ||= DEFAULT_URL_CLASS
options[:list_class] ||= DEFAULT_LIST_CLASS
options[:username_class] ||= DEFAULT_USERNAME_CLASS
options[:username_url_base] ||= "http://twitter.com/"
options[:list_url_base] ||= "http://twitter.com/"
options[:username_url_base] ||= "https://twitter.com/"
options[:list_url_base] ||= "https://twitter.com/"
options[:hashtag_class] ||= DEFAULT_HASHTAG_CLASS
options[:hashtag_url_base] ||= "http://twitter.com/#!/search?q=%23"
options[:hashtag_url_base] ||= "https://twitter.com/#!/search?q=%23"
options[:target] ||= DEFAULT_TARGET
extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow]

Expand Down Expand Up @@ -187,10 +187,10 @@ def auto_link_urls_custom(text, href_options = {})
BOOLEAN_ATTRIBUTES = Set.new([:disabled, :readonly, :multiple, :checked]).freeze

def html_attrs_for_options(options)
html_attrs options.reject{|k, v| OPTIONS_NOT_ATTRIBUTES.include?(k)}
autolink_html_attrs options.reject{|k, v| OPTIONS_NOT_ATTRIBUTES.include?(k)}
end

def html_attrs(options)
def autolink_html_attrs(options)
options.inject("") do |attrs, (key, value)|
if BOOLEAN_ATTRIBUTES.include?(key)
value = value ? key : nil
Expand Down
2 changes: 1 addition & 1 deletion lib/regex.rb
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def self.regex_range(from, to = nil) # :nodoc:
# Used in Extractor and Rewriter for final filtering
REGEXEN[:end_hashtag_match] = /\A(?:[##]|:\/\/)/o

REGEXEN[:valid_mention_preceding_chars] = /(?:[^a-zA-Z0-9_]|^|RT:?)/o
REGEXEN[:valid_mention_preceding_chars] = /(?:[^a-zA-Z0-9_!#\$%&*@@]|^|RT:?)/o
REGEXEN[:at_signs] = /[@@]/
REGEXEN[:valid_mention_or_list] = /
(#{REGEXEN[:valid_mention_preceding_chars]}) # $1: Preceeding character
Expand Down
14 changes: 7 additions & 7 deletions spec/autolinking_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ def original_text; "#{[0xFF03].pack('U')}twj_dev"; end
it "should be linked" do
link = Nokogiri::HTML(@autolinked_text).search('a')
(link.inner_text.respond_to?(:force_encoding) ? link.inner_text.force_encoding("utf-8") : link.inner_text).should == "#{[0xFF03].pack('U')}twj_dev"
link.first['href'].should == 'http://twitter.com/#!/search?q=%23twj_dev'
link.first['href'].should == 'https://twitter.com/#!/search?q=%23twj_dev'
end
end

Expand All @@ -309,7 +309,7 @@ def original_text
end

it "should be linked" do
@autolinked_text.should == "<a href=\"http://twitter.com/#!/search?q=%23éhashtag\" title=\"#éhashtag\" class=\"tweet-url hashtag\" rel=\"nofollow\">#éhashtag</a>"
@autolinked_text.should == "<a href=\"https://twitter.com/#!/search?q=%23éhashtag\" title=\"#éhashtag\" class=\"tweet-url hashtag\" rel=\"nofollow\">#éhashtag</a>"
end
end

Expand Down Expand Up @@ -530,13 +530,13 @@ def original_text; "Single char file ext http://www.bestbuy.com/site/Currie+Tech
end

it "should allow url/hashtag overlap" do
auto_linked = @linker.auto_link("http://twitter.com/#search")
auto_linked.should have_autolinked_url('http://twitter.com/#search')
auto_linked = @linker.auto_link("https://twitter.com/#search")
auto_linked.should have_autolinked_url('https://twitter.com/#search')
end

it "should not add invalid option in HTML tags" do
auto_linked = @linker.auto_link("http://twitter.com/ is a URL, not a hashtag", :hashtag_class => 'hashtag_classname')
auto_linked.should have_autolinked_url('http://twitter.com/')
auto_linked = @linker.auto_link("https://twitter.com/ is a URL, not a hashtag", :hashtag_class => 'hashtag_classname')
auto_linked.should have_autolinked_url('https://twitter.com/')
auto_linked.should_not include('hashtag_class')
auto_linked.should_not include('hashtag_classname')
end
Expand Down Expand Up @@ -626,7 +626,7 @@ def original_text; "Single char file ext http://www.bestbuy.com/site/Currie+Tech
@linker.html_escape("&<>\"").should == "&amp;&lt;&gt;&quot;"
@linker.html_escape("<div>").should == "&lt;div&gt;"
@linker.html_escape("a&b").should == "a&amp;b"
@linker.html_escape("<a href=\"http://twitter.com\" target=\"_blank\">twitter & friends</a>").should == "&lt;a href=&quot;http://twitter.com&quot; target=&quot;_blank&quot;&gt;twitter &amp; friends&lt;/a&gt;"
@linker.html_escape("<a href=\"https://twitter.com\" target=\"_blank\">twitter & friends</a>").should == "&lt;a href=&quot;https://twitter.com&quot; target=&quot;_blank&quot;&gt;twitter &amp; friends&lt;/a&gt;"
@linker.html_escape("&amp;").should == "&amp;amp;"
@linker.html_escape(nil).should == nil
end
Expand Down
7 changes: 7 additions & 0 deletions spec/extractor_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ class TestExtractor
it "should be linked in Japanese text" do
@extractor.extract_mentioned_screen_names("の@aliceに到着を待っている").should == ["alice"]
end

it "should ignore mentions preceded by !, @, #, $, %, & or *" do
invalid_chars = ['!', '@', '#', '$', '%', '&', '*']
invalid_chars.each do |c|
@extractor.extract_mentioned_screen_names("f#{c}@kn").should == []
end
end
end

it "should accept a block arugment and call it in order" do
Expand Down
6 changes: 3 additions & 3 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
@link = Nokogiri::HTML(text).search("a.username")
@link &&
@link.inner_text == expected &&
"http://twitter.com/#{screen_name}".downcase.should == @link.first['href']
"https://twitter.com/#{screen_name}".downcase.should == @link.first['href']
end

failure_message_for_should do |text|
Expand All @@ -85,7 +85,7 @@
@link = Nokogiri::HTML(text).search("a.list-slug")
@link &&
@link.inner_text == expected &&
"http://twitter.com/#{list_path}".downcase.should == @link.first['href']
"https://twitter.com/#{list_path}".downcase.should == @link.first['href']
end

failure_message_for_should do |text|
Expand All @@ -107,7 +107,7 @@

RSpec::Matchers.define :have_autolinked_hashtag do |hashtag|
match do |text|
@link = Nokogiri::HTML(text).search("a[@href='http://twitter.com/#!/search?q=#{hashtag.sub(/^#/, '%23')}']")
@link = Nokogiri::HTML(text).search("a[@href='https://twitter.com/#!/search?q=#{hashtag.sub(/^#/, '%23')}']")
@link &&
@link.inner_text &&
@link.inner_text == hashtag
Expand Down
2 changes: 1 addition & 1 deletion twitter-text.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Gem::Specification.new do |s|
s.name = "twitter-text"
s.version = "1.4.15"
s.version = "1.4.16"
s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle",
"Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa", "Keita Fujii"]
s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com",
Expand Down

0 comments on commit 34fa8a8

Please sign in to comment.