网站首页  汉语字词  英语词汇  考试资料  写作素材  旧版资料

请输入您要查询的考试资料:

 

标签的border 样式在浏览器中显示不出来的解决方法
  • 剖析标注HTML元素时class比id所具有的优势
  • Table显示你要显示的边框代码
  • 点击按钮文字变成input框,点击保存变成文字的实现代码
  • 标题 python实现的一个火车票转让信息采集器
    内容
        这篇文章主要介绍了python实现的一个火车票转让信息采集器,采集信息来源是58同程或者赶集网,需要的朋友可以参考下。
        #coding: utf-8
        '''
        春运查询火车票转让信息
        Author: piglei2007@gmail.com
        Date: 2011.01.25
        '''
        import re
        import os
        import time
        import urlparse
        import datetime
        import traceback
        import urllib2
        import socket
        socket.setdefaulttimeout(20)
        BLANK_RE = re.compile(r"\s+")
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor())
        opener.addheaders = [
          ("User-agent", "Mozilla/5.0 (X11; U; FreeBSD i386; en-US; rv:1.9.1) Gecko/20090704 Firefox/3.5"),
          ("Accept", "*/*"),
        ]
        urllib2.install_opener(opener)
        from BeautifulSoup import BeautifulSoup
        SOURCE = {
          "58": "http://bj.58.com/huochepiao/?Num=%(train)s&StartTime=%(date)s00",
          "ganji": "http://bj.ganji.com/piao/cc_%(train)s/%(date)s/",
        }
        RECORD_FILE = "/tmp/ticket_records.txt"
        def parse_record():
          try:
            return set([x.strip() for x in open(RECORD_FILE, "r").readlines()])
          except IOError:
            open(RECORD_FILE, "w")
            return set()
        def flush_record(records):
          open(RECORD_FILE, "w").write("\n".join(records))
        def main(config):
          """
          开始抓取
          """
          existed = parse_record()
          to_email = []
          for train in config["trains"]:
            for date in config["dates"]:
              for type, _url in SOURCE.items():
                url = _url % dict(train=train, date=date)
                content = urllib2.urlopen(url).read()
                soup = BeautifulSoup(content)
                result = parse_content(type, soup, train)
                for url, text in result:
                  url = urlparse.urljoin(_url, url)
                  # 只要卧铺!
                  if url not in existed and u"卧" in text:
                    to_email.append([text, url])
                  existed.add(url)
          if to_email:
            content = "".join(
              [x for x in [" | ".join(y) for y in to_email]]
            ).encode("utf-8")
            simple_mail(config["people"], content)
          flush_record(existed)
        def parse_content(type, soup, train):
          """
          获得车次信息
          """
          result = []
          if type == "58":
            info_table = soup.find("table", id="infolist")
            if info_table:
              for x in info_table.findAll("tr", text=re.compile(ur"%s(?!时刻表)" % train, re.I)):
                a = x.parent
                _text = BLANK_RE.sub("", a.text)
                result.append([a["href"], _text])
          if type == "ganji":
            for x in soup.findAll("dl", {"class": "list_piao"}):
              a = x.dt.a
              result.append([a["href"], a.text])
          return result
        EMAIL_HOST = 'smtp.sohu.com'
        EMAIL_HOST_USER = 'yourname@sohu.com'
        EMAIL_HOST_PASSWORD = 'yourpassword'
        EMAIL_PORT = 25
        def simple_mail(to, content):
          """
          发送邮件
          """
          import smtplib
          from email.mime.text import MIMEText
          msgRoot = MIMEText(content, 'html', 'UTF-8')
          msgRoot['Subject'] = "[%s]有票来啦!!!!" % datetime.datetime.today().isoformat(" ")
          msgRoot['From'] = EMAIL_HOST_USER
          msgRoot['To'] = ", ".join(to)
          s = smtplib.SMTP(EMAIL_HOST, EMAIL_PORT)
          s.login(EMAIL_HOST_USER, EMAIL_HOST_PASSWORD)
          s.sendmail(EMAIL_HOST_USER, to, msgRoot.as_string())
          s.close()
        def switch_time_zone():
          """
          切换时区
          """
          os.environ["TZ"] = "Asia/Shanghai"
          time.tzset()
        switch_time_zone()
        if __name__ == '__main__':
          config = {
            "trains": ("k471",),
            "dates": ("20110129",),
            "people": (
              "youremail@sohu.com",
            )
          }
          try:
            main(config)
            print "%s: ok" % datetime.datetime.today()
          except Exception, e:
            print traceback.format_exc()然后放入cron,你懂的。
    随便看

     

    在线学习网考试资料包含高考、自考、专升本考试、人事考试、公务员考试、大学生村官考试、特岗教师招聘考试、事业单位招聘考试、企业人才招聘、银行招聘、教师招聘、农村信用社招聘、各类资格证书考试等各类考试资料。

     

    Copyright © 2002-2024 cuapp.net All Rights Reserved
    更新时间:2026/5/4 20:46:57