TwitterJSON
ソースをちょろっと見直してみた。
require 'rubygems' require 'mechanize' require 'json/lexer' class TwitterJSON # error class LoginError < StandardError ; end class OverLimitError < StandardError ; end # RETRY_MAX = 5 # constructor def initialize(user_name , password , option = {}) option[:max_history] ||= 1 @user_name = user_name @password = password @logined = false @agent = WWW::Mechanize.new @agent.max_history = option[:max_history].to_i end # get json def json login unless @logined parse end # # private methods # private def login start = Time.now puts "login ... start" if $DEBUG page = @agent.get('http://twitter.com') form = page.forms[1] form["username_or_email"] = @user_name form["password"] = @password page = @agent.submit(form) body = page.root.get_elements_by_tag_name("body")[0] if body && body[:class] == "account" puts "login ... end (" + (Time.now - start).to_s + ")" if $DEBUG @logined = true else raise LoginError.new end end def parse start = Time.now puts "parse ... start" if $DEBUG table = nil 0.upto(RETRY_MAX){|i| puts "request try #{i}" if $DEBUG page = @agent.get('http://twitter.com/home') table = page.root.get_element_by_id("timeline") break if table } raise OverLimitError.new until table list = [] table.each_child {|tr| next unless tr.elem? screen_name = $1 if tr.find_element("a")[:href] =~ /.*\/(.*)$/ name = tr.find_element("img")[:alt] img = tr.find_element("img")[:src] text = tr.find_element("span").innerHTML.strip text = text.gsub(/@<a .*?>(.*?)<\/a>/ , "@\\1") text = text.gsub(/<a href="(.*?)".*?>.*?<\/a>/ , "\\1") date = tr.at("abbr.published")[:title] id = tr[:id].split("_")[1] user = {"name" => name , "screen_name" => screen_name , "profile_image_url" => img} list << {"user" => user , "text" => text , "created_at" => date , "id" => id} } puts "parse ... end (" + (Time.now - start).to_s + ")" if $DEBUG list.to_json end end