# -*- coding:utf-8 -*-
"""
基本面数据接口
Created on 2015/01/18
@author: Jimmy Liu
@group : waditu
@contact: jimmysoa@sina.cn
"""
import pandas as pd
from tushare.stock import cons as ct
import lxml.html
from lxml import etree
import re
import time
v = pd.__version__
if int(v.split('.')[1])>=25 or int(v.split('.')[0])>0:
from io import StringIO
else:
from pandas.compat import StringIO
from tushare.util import dateu as du
try:
from urllib.request import urlopen, Request
except ImportError:
from urllib2 import urlopen, Request
def get_stock_basics(date=None):
"""
获取沪深上市公司基本情况
Parameters
date:日期YYYY-MM-DD,默认为上一个交易日,目前只能提供2016-08-09之后的历史数据
Return
--------
DataFrame
code,代码
name,名称
industry,细分行业
area,地区
pe,市盈率
outstanding,流通股本
totals,总股本(万)
totalAssets,总资产(万)
liquidAssets,流动资产
fixedAssets,固定资产
reserved,公积金
reservedPerShare,每股公积金
eps,每股收益
bvps,每股净资
pb,市净率
timeToMarket,上市日期
"""
print("本接口即将停止更新,请尽快使用Pro版接口:https://tushare.pro/document/2")
wdate = du.last_tddate() if date is None else date
wdate = wdate.replace('-', '')
if wdate < '20160809':
return None
datepre = '' if date is None else wdate[0:4] + wdate[4:6] + '/'
request = Request(ct.ALL_STOCK_BASICS_FILE%(datepre, '' if date is None else wdate))
text = urlopen(request, timeout=10).read()
text = text.decode('GBK')
text = text.replace('--', '')
df = pd.read_csv(StringIO(text), dtype={'code':'object'})
df = df.set_index('code')
return df
def get_report_data(year, quarter):
"""
获取业绩报表数据
Parameters
--------
year:int 年度 e.g:2014
quarter:int 季度 :1、2、3、4,只能输入这4个季度
说明:由于是从网站获取的数据,需要一页页抓取,速度取决于您当前网络速度
Return
--------
DataFrame
code,代码
name,名称
eps,每股收益
eps_yoy,每股收益同比(%)
bvps,每股净资产
roe,净资产收益率(%)
epcf,每股现金流量(元)
net_profits,净利润(万元)
profits_yoy,净利润同比(%)
distrib,分配方案
report_date,发布日期
"""
print("本接口即将停止更新,请尽快使用Pro版接口:https://tushare.pro/document/2")
if ct._check_input(year,quarter) is True:
ct._write_head()
df = _get_report_data(year, quarter, 1, pd.DataFrame())
if df is not None:
# df = df.drop_duplicates('code')
df['code'] = df['code'].map(lambda x:str(x).zfill(6))
return df
def _get_report_data(year, quarter, pageNo, dataArr,
retry_count=3, pause=0.001):
ct._write_console()
for _ in range(retry_count):
time.sleep(pause)
try:
request = Request(ct.REPORT_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'],
year, quarter, pageNo, ct.PAGE_NUM[1]))
text = urlopen(request, timeout=10).read()
text = text.decode('GBK')
text = text.replace('--', '')
html = lxml.html.parse(StringIO(text))
res = html.xpath("//table[@class=\"list_table\"]/tr")
if ct.PY3:
sarr = [etree.tostring(node).decode('utf-8') for node in res]
else:
sarr = [etree.tostring(node) for node in res]
sarr = ''.join(sarr)
sarr = '
'%sarr
df = pd.read_html(sarr)[0]
df = df.drop(11, axis=1)
df.columns = ct.REPORT_COLS
dataArr = dataArr.append(df, ignore_index=True)
nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
if len(nextPage)>0:
pageNo = re.findall(r'\d+', nextPage[0])[0]
return _get_report_data(year, quarter, pageNo, dataArr)
else:
return dataArr
except Exception as e:
pass
raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_profit_data(year, quarter):
"""
获取盈利能力数据
Parameters
--------
year:int 年度 e.g:2014
quarter:int 季度 :1、2、3、4,只能输入这4个季度
说明:由于是从网站获取的数据,需要一页页抓取,速度取决于您当前网络速度
Return
--------
DataFrame
code,代码
name,名称
roe,净资产收益率(%)
net_profit_ratio,净利率(%)
gross_profit_rate,毛利率(%)
net_profits,净利润(万元)
eps,每股收益
business_income,营业收入(百万元)
bips,每股主营业务收入(元)
"""
print("本接口即将停止更新,请尽快使用Pro版接口:https://tushare.pro/document/2")
if ct._check_input(year, quarter) is True:
ct._write_head()
data = _get_profit_data(year, quarter, 1, pd.DataFrame())
if data is not None:
# data = data.drop_duplicates('code')
data['code'] = data['code'].map(lambda x:str(x).zfill(6))
return data
def _get_profit_data(year, quarter, pageNo, dataArr,
retry_count=3, pause=0.001):
ct._write_console()
for _ in range(retry_count):
time.sleep(pause)
try:
request = Request(ct.PROFIT_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'],
ct.PAGES['fd'], year,
quarter, pageNo, ct.PAGE_NUM[1]))
text = urlopen(request, timeout=10).read()
text = text.decode('GBK')
text = text.replace('--', '')
html = lxml.html.parse(StringIO(text))
res = html.xpath("//table[@class=\"list_table\"]/tr")
if ct.PY3:
sarr = [etree.tostring(node).decode('utf-8') for node in res]
else:
sarr = [etree.tostring(node) for node in res]
sarr = ''.join(sarr)
sarr = ''%sarr
df = pd.read_html(sarr)[0]
df.columns=ct.PROFIT_COLS
dataArr = dataArr.append(df, ignore_index=True)
nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
if len(nextPage)>0:
pageNo = re.findall(r'\d+', nextPage[0])[0]
return _get_profit_data(year, quarter, pageNo, dataArr)
else:
return dataArr
except:
pass
raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_operation_data(year, quarter):
"""
获取营运能力数据
Parameters
--------
year:int 年度 e.g:2014
quarter:int 季度 :1、2、3、4,只能输入这4个季度
说明:由于是从网站获取的数据,需要一页页抓取,速度取决于您当前网络速度
Return
--------
DataFrame
code,代码
name,名称
arturnover,应收账款周转率(次)
arturndays,应收账款周转天数(天)
inventory_turnover,存货周转率(次)
inventory_days,存货周转天数(天)
currentasset_turnover,流动资产周转率(次)
currentasset_days,流动资产周转天数(天)
"""
print("本接口即将停止更新,请尽快使用Pro版接口:https://tushare.pro/document/2")
if ct._check_input(year, quarter) is True:
ct._write_head()
data = _get_operation_data(year, quarter, 1, pd.DataFrame())
if data is not None:
# data = data.drop_duplicates('code')
data['code'] = data['code'].map(lambda x:str(x).zfill(6))
return data
def _get_operation_data(year, quarter, pageNo, dataArr,
retry_count=3, pause=0.001):
ct._write_console()
for _ in range(retry_count):
time.sleep(pause)
try:
request = Request(ct.OPERATION_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'],
ct.PAGES['fd'], year,
quarter, pageNo, ct.PAGE_NUM[1]))
text = urlopen(request, timeout=10).read()
text = text.decode('GBK')
text = text.replace('--', '')
html = lxml.html.parse(StringIO(text))
res = html.xpath("//table[@class=\"list_table\"]/tr")
if ct.PY3:
sarr = [etree.tostring(node).decode('utf-8') for node in res]
else:
sarr = [etree.tostring(node) for node in res]
sarr = ''.join(sarr)
sarr = ''%sarr
df = pd.read_html(sarr)[0]
df.columns=ct.OPERATION_COLS
dataArr = dataArr.append(df, ignore_index=True)
nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
if len(nextPage)>0:
pageNo = re.findall(r'\d+', nextPage[0])[0]
return _get_operation_data(year, quarter, pageNo, dataArr)
else:
return dataArr
except Exception as e:
pass
raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_growth_data(year, quarter):
"""
获取成长能力数据
Parameters
--------
year:int 年度 e.g:2014
quarter:int 季度 :1、2、3、4,只能输入这4个季度
说明:由于是从网站获取的数据,需要一页页抓取,速度取决于您当前网络速度
Return
--------
DataFrame
code,代码
name,名称
mbrg,主营业务收入增长率(%)
nprg,净利润增长率(%)
nav,净资产增长率
targ,总资产增长率
epsg,每股收益增长率
seg,股东权益增长率
"""
print("本接口即将停止更新,请尽快使用Pro版接口:https://tushare.pro/document/2")
if ct._check_input(year, quarter) is True:
ct._write_head()
data = _get_growth_data(year, quarter, 1, pd.DataFrame())
if data is not None:
# data = data.drop_duplicates('code')
data['code'] = data['code'].map(lambda x:str(x).zfill(6))
return data
def _get_growth_data(year, quarter, pageNo, dataArr,
retry_count=3, pause=0.001):
ct._write_console()
for _ in range(retry_count):
time.sleep(pause)
try:
request = Request(ct.GROWTH_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'],
ct.PAGES['fd'], year,
quarter, pageNo, ct.PAGE_NUM[1]))
text = urlopen(request, timeout=50).read()
text = text.decode('GBK')
text = text.replace('--', '')
html = lxml.html.parse(StringIO(text))
res = html.xpath("//table[@class=\"list_table\"]/tr")
if ct.PY3:
sarr = [etree.tostring(node).decode('utf-8') for node in res]
else:
sarr = [etree.tostring(node) for node in res]
sarr = ''.join(sarr)
sarr = ''%sarr
df = pd.read_html(sarr)[0]
df.columns=ct.GROWTH_COLS
dataArr = dataArr.append(df, ignore_index=True)
nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
if len(nextPage)>0:
pageNo = re.findall(r'\d+', nextPage[0])[0]
return _get_growth_data(year, quarter, pageNo, dataArr)
else:
return dataArr
except Exception as e:
pass
raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_debtpaying_data(year, quarter):
"""
获取偿债能力数据
Parameters
--------
year:int 年度 e.g:2014
quarter:int 季度 :1、2、3、4,只能输入这4个季度
说明:由于是从网站获取的数据,需要一页页抓取,速度取决于您当前网络速度
Return
--------
DataFrame
code,代码
name,名称
currentratio,流动比率
quickratio,速动比率
cashratio,现金比率
icratio,利息支付倍数
sheqratio,股东权益比率
adratio,股东权益增长率
"""
print("本接口即将停止更新,请尽快使用Pro版接口:https://tushare.pro/document/2")
if ct._check_input(year, quarter) is True:
ct._write_head()
df = _get_debtpaying_data(year, quarter, 1, pd.DataFrame())
if df is not None:
# df = df.drop_duplicates('code')
df['code'] = df['code'].map(lambda x:str(x).zfill(6))
return df
def _get_debtpaying_data(year, quarter, pageNo, dataArr,
retry_count=3, pause=0.001):
ct._write_console()
for _ in range(retry_count):
time.sleep(pause)
try:
request = Request(ct.DEBTPAYING_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'],
ct.PAGES['fd'], year,
quarter, pageNo, ct.PAGE_NUM[1]))
text = urlopen(request, timeout=10).read()
text = text.decode('GBK')
html = lxml.html.parse(StringIO(text))
res = html.xpath("//table[@class=\"list_table\"]/tr")
if ct.PY3:
sarr = [etree.tostring(node).decode('utf-8') for node in res]
else:
sarr = [etree.tostring(node) for node in res]
sarr = ''.join(sarr)
sarr = ''%sarr
df = pd.read_html(sarr)[0]
df.columns = ct.DEBTPAYING_COLS
dataArr = dataArr.append(df, ignore_index=True)
nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
if len(nextPage)>0:
pageNo = re.findall(r'\d+', nextPage[0])[0]
return _get_debtpaying_data(year, quarter, pageNo, dataArr)
else:
return dataArr
except Exception as e:
pass
raise IOError(ct.NETWORK_URL_ERROR_MSG)
def get_cashflow_data(year, quarter):
"""
获取现金流量数据
Parameters
--------
year:int 年度 e.g:2014
quarter:int 季度 :1、2、3、4,只能输入这4个季度
说明:由于是从网站获取的数据,需要一页页抓取,速度取决于您当前网络速度
Return
--------
DataFrame
code,代码
name,名称
cf_sales,经营现金净流量对销售收入比率
rateofreturn,资产的经营现金流量回报率
cf_nm,经营现金净流量与净利润的比率
cf_liabilities,经营现金净流量对负债比率
cashflowratio,现金流量比率
"""
print("本接口即将停止更新,请尽快使用Pro版接口:https://tushare.pro/document/2")
if ct._check_input(year, quarter) is True:
ct._write_head()
df = _get_cashflow_data(year, quarter, 1, pd.DataFrame())
if df is not None:
# df = df.drop_duplicates('code')
df['code'] = df['code'].map(lambda x:str(x).zfill(6))
return df
def _get_cashflow_data(year, quarter, pageNo, dataArr,
retry_count=3, pause=0.001):
ct._write_console()
for _ in range(retry_count):
time.sleep(pause)
try:
request = Request(ct.CASHFLOW_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'],
ct.PAGES['fd'], year,
quarter, pageNo, ct.PAGE_NUM[1]))
text = urlopen(request, timeout=10).read()
text = text.decode('GBK')
text = text.replace('--', '')
html = lxml.html.parse(StringIO(text))
res = html.xpath("//table[@class=\"list_table\"]/tr")
if ct.PY3:
sarr = [etree.tostring(node).decode('utf-8') for node in res]
else:
sarr = [etree.tostring(node) for node in res]
sarr = ''.join(sarr)
sarr = ''%sarr
df = pd.read_html(sarr)[0]
df.columns = ct.CASHFLOW_COLS
dataArr = dataArr.append(df, ignore_index=True)
nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
if len(nextPage)>0:
pageNo = re.findall(r'\d+', nextPage[0])[0]
return _get_cashflow_data(year, quarter, pageNo, dataArr)
else:
return dataArr
except Exception as e:
pass
raise IOError(ct.NETWORK_URL_ERROR_MSG)
def _data_path():
import os
import inspect
caller_file = inspect.stack()[1][1]
pardir = os.path.abspath(os.path.join(os.path.dirname(caller_file), os.path.pardir))
return os.path.abspath(os.path.join(pardir, os.path.pardir))
def get_balance_sheet(code):
"""
获取某股票的历史所有时期资产负债表
Parameters
--------
code:str 股票代码 e.g:600518
Return
--------
DataFrame
行列名称为中文且数目较多,建议获取数据后保存到本地查看
"""
print("本接口即将停止更新,请尽快使用Pro版接口:https://tushare.pro/document/2")
if code.isdigit():
request = Request(ct.SINA_BALANCESHEET_URL%(code))
text = urlopen(request, timeout=10).read()
text = text.decode('GBK')
text = text.replace('\t\n', '\r\n')
text = text.replace('\t', ',')
df = pd.read_csv(StringIO(text), dtype={'code':'object'})
return df
def get_profit_statement(code):
"""
获取某股票的历史所有时期利润表
Parameters
--------
code:str 股票代码 e.g:600518
Return
--------
DataFrame
行列名称为中文且数目较多,建议获取数据后保存到本地查看
"""
print("本接口即将停止更新,请尽快使用Pro版接口:https://tushare.pro/document/2")
if code.isdigit():
request = Request(ct.SINA_PROFITSTATEMENT_URL%(code))
text = urlopen(request, timeout=10).read()
text = text.decode('GBK')
text = text.replace('\t\n', '\r\n')
text = text.replace('\t', ',')
df = pd.read_csv(StringIO(text), dtype={'code':'object'})
return df
def get_cash_flow(code):
"""
获取某股票的历史所有时期现金流表
Parameters
--------
code:str 股票代码 e.g:600518
Return
--------
DataFrame
行列名称为中文且数目较多,建议获取数据后保存到本地查看
"""
print("本接口即将停止更新,请尽快使用Pro版接口:https://tushare.pro/document/2")
if code.isdigit():
request = Request(ct.SINA_CASHFLOW_URL%(code))
text = urlopen(request, timeout=10).read()
text = text.decode('GBK')
text = text.replace('\t\n', '\r\n')
text = text.replace('\t', ',')
df = pd.read_csv(StringIO(text), dtype={'code':'object'})
return df