#送返修数据采集 import time from bs4 import BeautifulSoup from urllib.error import HTTPError from urllib.error import URLError from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.remote.webelement import WebElement from selenium.common.exceptions import StaleElementReferenceException from selenium.common.exceptions import NoSuchElementException #常量 SITE_LINK = 'http://10.190.48.74:8601/' IMAGE_LINK = 'images/repairMissionQuery.png' BUTTON_TEXT = '统计查询' MENU_TEXT = '送返修工单查询' SCRAP_TIME_INTERVAL = 10 #抓取的时间间隔 def WaitForLogin( wbDriver, tagID ): waitCount = 0 MaxCount = 100 while True: #waitCount += 1 if waitCount > MaxCount: print('骑上我的小摩托!!!!') return #查找id,如果找到则继续循环,如果找不到说明页面跳转了 try: element = wbDriver.find_element_by_id( tagID ) print('等待用户登录!') time.sleep(1) except StaleElementReferenceException: return except NoSuchElementException: return def GetSessionID(webDriver): try: cookies = dict( webDriver.get_cookies()[0] ) return cookies['name'] + '=' + cookies['value'] except: return None def ScrapRepairOrderData(): #打开driver,登录网页 try: wb_ie = webdriver.Ie(executable_path='D:/develop/sdk/python/Selenium/ie_driver/IEDriverServer.exe') wb_ie.get('http://10.190.48.74:8601/login.jsp') except: print('打开driver和网页时出错!') exit(-1) action = ActionChains(wb_ie) bs = BeautifulSoup( wb_ie.page_source, 'html.parser') names = bs.find_all("img", {'id':'captcha'}) #填写登录信息 try: captcha = wb_ie.find_element_by_id('captcha') branchCode = wb_ie.find_element_by_id('branchCode') userCode = wb_ie.find_element_by_id('userCode') password = wb_ie.find_element_by_id('password') #登录的循环,没错误时才继续。 while True: try: branchCode.clear() userCode.clear() password.clear() branchCode.send_keys('3080100') userCode.send_keys('588') password.send_keys('Kane@1982') WaitForLogin( wb_ie, 'captcha') except Exception as e: print(e) else: break except NoSuchElementException: print('获取登录页面元素失败!') wb_ie.close() exit(-1) #输出cookie sessionID = GetSessionID( wb_ie ) try: #点击统计查询 element = wb_ie.find_element_by_xpath("//*[text()='统计查询']") action.move_to_element_with_offset(element, 1, 1 ).perform() #time.sleep(1) action.context_click(element).perform() action.click(element) action.perform() #点击送返修工单查询 element = wb_ie.find_element_by_xpath("//*[text()='送返修工单查询']") action.move_to_element_with_offset(element, 1, 1 ).perform() #time.sleep(1) action.context_click(element).perform() action.click(element) action.perform() #time.sleep(1) #切换iframe tabset_workOrderQuery wb_ie.switch_to.frame('tabset_workOrderQuery') #按钮 queryButton = wb_ie.find_element_by_xpath("//*[text()='查询']") #日期元素 startDateElement = wb_ie.find_element_by_id('createStartDate') endDateElement = wb_ie.find_element_by_id('createEndDate') startDateElement.send_keys('2019-11-10') endDateElement.send_keys('2019-12-10') #开始查询和抓取循环 while True: #填写起始日期和终止日期 #startDateElement.clear() #endDateElement.clear() #startDateElement.send_keys('2019-11-10') #endDateElement.send_keys('2019-12-10') #查询 action.move_to_element_with_offset( queryButton, 2, 2 ).perform() action.click(queryButton).perform() #查询后等待一段时间 time.sleep(300) except NoSuchElementException: print('查找不到元素') exit(-1) except: print('页面读取错误') exit(-1) finally: wb_ie.close() None while True: time.sleep(1) print('已登录!!!') #测试代码 if __name__ == '__main__': ScrapRepairOrderData()