TwitterJSON
Twitter の HTML を解析して JSON 形式にする。
発言のアンカータグを消す修正をいれた。
require 'rubygems' require 'mechanize' require 'json/lexer' class TwitterJSON def initialize(user_name , password) @user_name = user_name @password = password @agent = WWW::Mechanize.new end def json login request end private def login page = @agent.get('http://twitter.com') form = page.forms[1] form["username_or_email"] = @user_name form["password"] = @password page = @agent.submit(form) end def request table = nil 0.upto(5){|i| page = @agent.get('http://twitter.com/home') table = page.root.get_element_by_id("timeline") break if table } list = [] table.each_child {|tr| next unless tr.elem? screen_name = $1 if tr.find_element("a")[:href] =~ /.*\/(.*)$/ name = tr.find_element("img")[:alt] img = tr.find_element("img")[:src] text = tr.find_element("span").innerHTML.strip text = text.gsub(/@<a .*?>(.*?)<\/a>/ , "@\\1") text = text.gsub(/<a href="(.*?)".*?>.*?<\/a>/ , "\\1") date = tr.at("abbr.published")[:title] id = tr[:id].split("_")[1] user = {"name" => name , "screen_name" => screen_name , "profile_image_url" => img} list << {"user" => user , "text" => text , "created_at" => date , "id" => id} } list.to_json end end
gsub で置換するときは $1 じゃなくて \\1 なのな。