from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
def getsinareport(code, endyear):
url = 'http://stock.finance.sina.com.cn/stock/go.php/vReport_List/kind/lastest/index.phtml'
# print(url)
driverpath = r'C:\Program Files\Google\Chrome\Application\chromedriver*ex.e**'
driver = webdriver.Chrome(executable_path=driverpath)
# driver.maximize_window()
chrome_options = webdriver.ChromeOptions()
# 使用headless*界无**面浏览器模式
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
driver.get(url)
driver.find_element_by_css_selector("input#t1_2").click()
your_input = driver.find_element(By.ID, "symbol")
your_input.send_keys(code)
your_input.send_keys(Keys.ENTER)
time.sleep(3)
#nextpages = driver.find_elements(By.XPATH, '//span[@class="pagebox_next"]')
nextpages = list()
nextpages.append(1)
while nextpages:
all_items = driver.find_elements(By.XPATH, '//td[@class="tal f14"]/../td')
titlelst = list()
for i in range(0, len(all_items), 6):
print(all_items[i].text)
year = all_items[i + 3].text
title = all_items[i + 3].text + all_items[i + 1].text
rstr = r"[\/\\\:\*\?\"\<\>\|]" # '/ \ : * ? " < > |'
title = re.sub(rstr, "_", title) # 替换为下划线
print(title)
titlelst.append([title, year])
all_items = driver.find_elements(By.XPATH, '//td[@class="tal f14"]/a')
for i in range(len(all_items)):
href = all_items[i].get_attribute('href')
time.sleep(10)
print(href)
subdriver = webdriver.Chrome(executable_path=driverpath)
chromeoptions = webdriver.ChromeOptions()
# 使用headless*界无**面浏览器模式
chromeoptions.add_argument('--headless')
chromeoptions.add_argument('--disable-gpu')
subdriver.implicitly_wait(1)
# subdriver.maximize_window()
subdriver.get(href)
creabs = subdriver.find_elements(By.XPATH, '//div[@class="creab"]/span')
directory = "D:\\code\\liar\\report\\" + code
mkdir(directory)
txtname = titlelst[i][0] + '.txt'
filename = os.path.join(directory, txtname)
yearnow = datetime.strptime(titlelst[i][1], '%Y-%m-%d')
yearend = datetime.strptime(endyear, '%Y-%m-%d')
print(yearnow, yearend)
if yearnow < yearend:
subdriver.quit()
break
else:
filexist = mkfile(filename)
if not filexist:
titlename = subdriver.find_elements(By.XPATH, '//div[@class="content"]/h1')
if titlename:
with open(filename, "a") as myfile:
myfile.write(titlename[0].text)
myfile.write('\n')
for creab in creabs:
print(creab.text)
with open(filename, "a") as myfile:
myfile.write(creab.text)
blk_container = subdriver.find_elements(By.XPATH, '//div[@class="blk_container"]/p')[0]
print(blk_container.text)
with open(filename, "a") as myfile:
myfile.write(blk_container.text)
else:
subdriver.quit()
break
subdriver.quit()
nextpages = driver.find_elements(By.XPATH, '//span[@class="pagebox_next"]')
if nextpages:
nextpages[0].click()
print('next---------------------------------->')
driver.quit()
print('DONE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
if __name__ == '__main__':
getsinareport('sh600212', '2013-04-28')