car_deal_util/code/python/车商渠道数据采集/RepairOrderScrap/RepairOrderScrap.py

164 lines
4.6 KiB
Python
Raw Permalink Normal View History

2019-12-10 09:54:37 +00:00
#送返修数据采集
import time
from bs4 import BeautifulSoup
from urllib.error import HTTPError
from urllib.error import URLError
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.remote.webelement import WebElement
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import NoSuchElementException
#常量
SITE_LINK = 'http://10.190.48.74:8601/'
IMAGE_LINK = 'images/repairMissionQuery.png'
BUTTON_TEXT = '统计查询'
MENU_TEXT = '送返修工单查询'
SCRAP_TIME_INTERVAL = 10 #抓取的时间间隔
def WaitForLogin( wbDriver, tagID ):
waitCount = 0
MaxCount = 100
while True:
#waitCount += 1
if waitCount > MaxCount:
print('骑上我的小摩托!!!!')
return
#查找id如果找到则继续循环如果找不到说明页面跳转了
try:
element = wbDriver.find_element_by_id( tagID )
print('等待用户登录!')
time.sleep(1)
except StaleElementReferenceException:
return
except NoSuchElementException:
return
2019-12-10 11:06:05 +00:00
def GetSessionID(webDriver):
try:
cookies = dict( webDriver.get_cookies()[0] )
return cookies['name'] + '=' + cookies['value']
except:
return None
def ScrapRepairOrderData():
#打开driver登录网页
try:
wb_ie = webdriver.Ie(executable_path='D:/develop/sdk/python/Selenium/ie_driver/IEDriverServer.exe')
wb_ie.get('http://10.190.48.74:8601/login.jsp')
except:
print('打开driver和网页时出错')
exit(-1)
action = ActionChains(wb_ie)
bs = BeautifulSoup( wb_ie.page_source, 'html.parser')
names = bs.find_all("img", {'id':'captcha'})
#填写登录信息
try:
captcha = wb_ie.find_element_by_id('captcha')
branchCode = wb_ie.find_element_by_id('branchCode')
userCode = wb_ie.find_element_by_id('userCode')
password = wb_ie.find_element_by_id('password')
#登录的循环,没错误时才继续。
while True:
try:
branchCode.clear()
userCode.clear()
password.clear()
branchCode.send_keys('3080100')
userCode.send_keys('588')
password.send_keys('Kane@1982')
WaitForLogin( wb_ie, 'captcha')
except Exception as e:
print(e)
else:
break
except NoSuchElementException:
print('获取登录页面元素失败!')
wb_ie.close()
exit(-1)
#输出cookie
sessionID = GetSessionID( wb_ie )
try:
#点击统计查询
element = wb_ie.find_element_by_xpath("//*[text()='统计查询']")
action.move_to_element_with_offset(element, 1, 1 ).perform()
#time.sleep(1)
action.context_click(element).perform()
action.click(element)
action.perform()
#点击送返修工单查询
element = wb_ie.find_element_by_xpath("//*[text()='送返修工单查询']")
action.move_to_element_with_offset(element, 1, 1 ).perform()
#time.sleep(1)
action.context_click(element).perform()
action.click(element)
action.perform()
#time.sleep(1)
#切换iframe tabset_workOrderQuery
wb_ie.switch_to.frame('tabset_workOrderQuery')
#按钮
queryButton = wb_ie.find_element_by_xpath("//*[text()='查询']")
#日期元素
startDateElement = wb_ie.find_element_by_id('createStartDate')
endDateElement = wb_ie.find_element_by_id('createEndDate')
2019-12-10 09:54:37 +00:00
2019-12-10 15:06:07 +00:00
startDateElement.send_keys('2019-11-10')
endDateElement.send_keys('2019-12-10')
2019-12-10 09:54:37 +00:00
2019-12-10 11:06:05 +00:00
#开始查询和抓取循环
while True:
#填写起始日期和终止日期
2019-12-10 15:06:07 +00:00
#startDateElement.clear()
#endDateElement.clear()
2019-12-10 11:06:05 +00:00
2019-12-10 15:06:07 +00:00
#startDateElement.send_keys('2019-11-10')
#endDateElement.send_keys('2019-12-10')
2019-12-10 11:06:05 +00:00
#查询
action.move_to_element_with_offset( queryButton, 2, 2 ).perform()
action.click(queryButton).perform()
#查询后等待一段时间
2019-12-10 15:06:07 +00:00
time.sleep(300)
2019-12-10 11:06:05 +00:00
except NoSuchElementException:
print('查找不到元素')
exit(-1)
except:
print('页面读取错误')
exit(-1)
finally:
2019-12-10 15:06:07 +00:00
wb_ie.close()
2019-12-10 11:06:05 +00:00
None
2019-12-10 09:54:37 +00:00
2019-12-10 11:06:05 +00:00
while True:
time.sleep(1)
print('已登录!!!')
2019-12-10 09:54:37 +00:00
2019-12-10 11:06:05 +00:00
#测试代码
if __name__ == '__main__':
ScrapRepairOrderData()
2019-12-10 09:54:37 +00:00