scRUBYt! で百度中国語版
scRUBYt! の板で百度 中国語版の記事があったのでやってみた。
#!/usr/bin/ruby -Ku # source must be written in UTF-8 $KCODE = 'u' require 'rubygems' require 'iconv' # Patching Mechanize... require 'mechanize' # Perform iconv() before Mechanize parses the page class IConvParser < WWW::Mechanize::Page def initialize(uri = nil, response = nil, body = nil, code = nil) new_body = Iconv.conv("UTF-8//IGNORE", "GB2312//IGNORE", body) super(uri, response, new_body, code) end end # Set IConvParser as default HTML parser, you don't have to # run iconv() to each page manually. class WWW::Mechanize::PluggableParser def initialize @parsers = { CONTENT_TYPES[:html] => IConvParser } @default = File end end # Scrubyt::FetchAction initializes its @@agent as PluggableParser.new # at class context, so we manually require 'scrubyt' # AFTER the above patch was installed. require 'scrubyt' # Now definition is quite simple baidu_data = Scrubyt::Extractor.define do fetch "http://www.baidu.com/" fill_textfield 'wd',"ruby" submit result "Ruby_百度百科" end puts baidu_data.to_xml