require 'rubygems' require 'mechanize' require 'rss' require 'iconv' version = '1.0' content = RSS::Maker.make(version) do |m| end site = "http://www.yonsei.ac.kr/contents/etc/ynotice.html" a = WWW::Mechanize.new page = a.get 'http://www.yonsei.ac.kr/community/notice/noticeList.asp' list = [] class WWW::Mechanize public :to_absolute_uri end a.page.body.scan(/js_mview\('(.*)','(.*)'\)/) {|act,pkid| form = a.page.form('listFrm') form.action = act form.pkid = pkid article = form.submit i = {} i[:link] = "http://www.yonsei.ac.kr#{act}?pkid=#{pkid}" i[:title] = (article/"td[@class='CBD3_TOPR B']").inner_text i[:author] = "nobody@yonsei.ac.kr (%s)" % (article/"td[@class='CBD_MID_LP8'][@colspan=4]").first.inner_html.sub(/ /, '') content = (article/"td[@class='CBD_MID_LP8'][@colspan=6]").inner_html i[:contents] = content.gsub(/(src|href)=(['"])(.*?)\2/) {|x| # puts "original = #{x}" "#{$1}='#{a.to_absolute_uri($3)}'" } i[:pubdate] = (article/"td[@class='CBD_MID_LP8'][@width=230]").inner_text # puts "title = " + (article/"td[@class='CBD3_TOPR B']").inner_text # puts "who = " + (article/"td[@class='CBD_MID_LP8'][@colspan=4]").first.inner_html.sub(/ /, '') # puts "contents = " + (article/"td[@class='CBD_MID_LP8'][@colspan=6]").inner_html list << i } rss = RSS::Rss.new("2.0") chan = RSS::Rss::Channel.new chan.title = "Yonsei Univ. Notice" chan.link = "http://www.yonsei.ac.kr/contents/etc/ynotice.html" chan.description = "Merong" rss.channel = chan list.each do |i| item = RSS::Rss::Channel::Item.new item.title = Iconv.iconv('utf-8', 'cp949',i[:title]) item.author = Iconv.iconv('utf-8', 'cp949', i[:author]) item.link = i[:link] item.description = Iconv.iconv('utf-8', 'cp949', i[:contents]) item.pubDate = Time.parse(i[:pubdate]) item.guid = RSS::Rss::Channel::Item::Guid.new(true, i[:link]) chan.items << item end File.open('/home/mithrandir/public_html/yonsei_notice.xml', 'w') {|f| f.write(rss.to_s) }