mirror of
https://github.com/opelly27/nasdaq_finance.git
synced 2026-05-20 08:37:43 +00:00
105 lines
3.5 KiB
Python
105 lines
3.5 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
from lxml import html
|
|
import requests
|
|
from time import sleep
|
|
import json
|
|
import argparse
|
|
from random import randint
|
|
|
|
def parse_finance_page(ticker):
|
|
"""
|
|
Grab financial data from NASDAQ page
|
|
|
|
Args:
|
|
ticker (str): Stock symbol
|
|
|
|
Returns:
|
|
dict: Scraped data
|
|
"""
|
|
key_stock_dict = {}
|
|
headers = {
|
|
"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
|
"Accept-Encoding":"gzip, deflate",
|
|
"Accept-Language":"en-GB,en;q=0.9,en-US;q=0.8,ml;q=0.7",
|
|
"Connection":"keep-alive",
|
|
"Host":"www.nasdaq.com",
|
|
"Referer":"http://www.nasdaq.com",
|
|
"Upgrade-Insecure-Requests":"1",
|
|
"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36"
|
|
}
|
|
|
|
# Retrying for failed request
|
|
for retries in range(5):
|
|
try:
|
|
url = "http://www.nasdaq.com/symbol/%s"%(ticker)
|
|
response = requests.get(url, headers = headers, verify=False)
|
|
|
|
if response.status_code!=200:
|
|
raise ValueError("Invalid Response Received From Webserver")
|
|
|
|
print("Parsing %s"%(url))
|
|
# Adding random delay
|
|
sleep(randint(1,3))
|
|
parser = html.fromstring(response.text)
|
|
xpath_head = "//div[contains(@id,'pageheader')]//h1//text()"
|
|
xpath_key_stock_table = '//div[contains(@class,"overview-results")]//div[contains(@class,"table-table")]/div'
|
|
xpath_open_price = '//b[contains(text(),"Open Price:")]/following-sibling::span/text()'
|
|
xpath_open_date = '//b[contains(text(),"Open Date:")]/following-sibling::span/text()'
|
|
xpath_close_price = '//b[contains(text(),"Close Price:")]/following-sibling::span/text()'
|
|
xpath_close_date = '//b[contains(text(),"Close Date:")]/following-sibling::span/text()'
|
|
xpath_key = './/div[@class="table-cell"]/b/text()'
|
|
xpath_value = './/div[@class="table-cell"]/text()'
|
|
|
|
raw_name = parser.xpath(xpath_head)
|
|
key_stock_table = parser.xpath(xpath_key_stock_table)
|
|
raw_open_price = parser.xpath(xpath_open_price)
|
|
raw_open_date = parser.xpath(xpath_open_date)
|
|
raw_close_price = parser.xpath(xpath_close_price)
|
|
raw_close_date = parser.xpath(xpath_close_date)
|
|
|
|
company_name = raw_name[0].replace("Common Stock Quote & Summary Data","").strip() if raw_name else ''
|
|
open_price =raw_open_price[0].strip() if raw_open_price else None
|
|
open_date = raw_open_date[0].strip() if raw_open_date else None
|
|
close_price = raw_close_price[0].strip() if raw_close_price else None
|
|
close_date = raw_close_date[0].strip() if raw_close_date else None
|
|
|
|
# Grabbing and cleaning keystock data
|
|
for i in key_stock_table:
|
|
key = i.xpath(xpath_key)
|
|
value = i.xpath(xpath_value)
|
|
|
|
key = ''.join(key).strip()
|
|
value = ' '.join(''.join(value).split())
|
|
key_stock_dict[key] = value
|
|
|
|
nasdaq_data = {
|
|
|
|
"company_name":company_name,
|
|
"ticker":ticker,
|
|
"url":url,
|
|
"open price":open_price,
|
|
"open_date":open_date,
|
|
"close_price":close_price,
|
|
"close_date":close_date,
|
|
"key_stock_data":key_stock_dict
|
|
}
|
|
return nasdaq_data
|
|
|
|
except Exception as e:
|
|
print("Failed to process the request, Exception:%s"%(e))
|
|
|
|
if __name__=="__main__":
|
|
|
|
argparser = argparse.ArgumentParser()
|
|
argparser.add_argument('ticker',help = 'Company stock symbol')
|
|
args = argparser.parse_args()
|
|
ticker = args.ticker
|
|
print("Fetching data for %s"%(ticker))
|
|
scraped_data = parse_finance_page(ticker)
|
|
print("Writing scraped data to output file")
|
|
|
|
with open('%s-summary.json'%(ticker),'w') as fp:
|
|
json.dump(scraped_data,fp,indent = 4,ensure_ascii=False)
|