car_deal_util/代码/python/车商渠道数据采集/RepairOrderScrap/RepairOrderScrap.py

154 lines
3.9 KiB
Python
Raw Normal View History

2019-12-10 09:54:37 +00:00
#送返修数据采集
import time
from bs4 import BeautifulSoup
from urllib.error import HTTPError
from urllib.error import URLError
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.remote.webelement import WebElement
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import NoSuchElementException
#常量
SITE_LINK = 'http://10.190.48.74:8601/'
IMAGE_LINK = 'images/repairMissionQuery.png'
BUTTON_TEXT = '统计查询'
MENU_TEXT = '送返修工单查询'
SCRAP_TIME_INTERVAL = 10 #抓取的时间间隔
def WaitForLogin( wbDriver, tagID ):
waitCount = 0
MaxCount = 100
while True:
#waitCount += 1
if waitCount > MaxCount:
print('骑上我的小摩托!!!!')
return
#查找id如果找到则继续循环如果找不到说明页面跳转了
try:
element = wbDriver.find_element_by_id( tagID )
print('等待用户登录!')
time.sleep(1)
except StaleElementReferenceException:
return
except NoSuchElementException:
return
#打开driver登录网页
try:
wb_ie = webdriver.Ie(executable_path='D:/develop/sdk/python/Selenium/ie_driver/IEDriverServer.exe')
wb_ie.get('http://10.190.48.74:8601/login.jsp')
except:
print('打开driver和网页时出错')
exit(-1)
action = ActionChains(wb_ie)
bs = BeautifulSoup( wb_ie.page_source, 'html.parser')
names = bs.find_all("img", {'id':'captcha'})
#填写登录信息
try:
captcha = wb_ie.find_element_by_id('captcha')
branchCode = wb_ie.find_element_by_id('branchCode')
userCode = wb_ie.find_element_by_id('userCode')
password = wb_ie.find_element_by_id('password')
branchCode.send_keys('3080100')
userCode.send_keys('588')
password.send_keys('Kane@1982')
except NoSuchElementException:
print('获取登录页面元素失败!')
wb_ie.close()
exit(-1)
try:
WaitForLogin( wb_ie, 'captcha')
except NoSuchElementException:
print('浏览器异常关闭!')
exit()
#输出cookie
try:
cookies = wb_ie.get_cookies()
cookie_dict = dict(cookies[0])
print( cookie_dict['name'] + '=' + cookie_dict['value'] )
except:
wb_ie.close()
exit()
try:
#点击统计查询
element = wb_ie.find_element_by_xpath("//*[text()='统计查询']")
action.move_to_element_with_offset(element, 1, 1 ).perform()
#time.sleep(1)
action.context_click(element).perform()
action.click(element)
action.perform()
#点击送返修工单查询
element = wb_ie.find_element_by_xpath("//*[text()='送返修工单查询']")
action.move_to_element_with_offset(element, 1, 1 ).perform()
#time.sleep(1)
action.context_click(element).perform()
action.click(element)
action.perform()
#time.sleep(1)
#切换iframe tabset_workOrderQuery
wb_ie.switch_to.frame('tabset_workOrderQuery')
#按钮
queryButton = wb_ie.find_element_by_xpath("//*[text()='查询']")
#日期元素
startDateElement = wb_ie.find_element_by_id('createStartDate')
endDateElement = wb_ie.find_element_by_id('createEndDate')
#开始查询和抓取循环
while True:
#填写起始日期和终止日期
startDateElement.clear()
endDateElement.clear()
startDateElement.send_keys('2019-11-10')
endDateElement.send_keys('2019-12-10')
#查询
action.move_to_element_with_offset( queryButton, 2, 2 ).perform()
action.click(queryButton).perform()
#查询后等待一段时间
#time.sleep(2)
except NoSuchElementException:
print('查找不到元素')
exit(-1)
except:
print('页面读取错误')
exit(-1)
finally:
#wb_ie.close()
None
while True:
time.sleep(1)
print('已登录!!!')