# -*- coding:utf-8 -*- """ 投资参考数据接口 Created on 2015/03/21 @author: Jimmy Liu @group : waditu @contact: jimmysoa@sina.cn """ from __future__ import division from tushare.stock import cons as ct from tushare.stock import ref_vars as rv import pandas as pd import numpy as np import time import lxml.html from lxml import etree import re import json v = pd.__version__ if int(v.split('.')[1])>=25 or int(v.split('.')[0])>0: from io import StringIO else: from pandas.compat import StringIO from tushare.util import dateu as du from tushare.util.netbase import Client try: from urllib.request import urlopen, Request except ImportError: from urllib2 import urlopen, Request def profit_data(year=2017, top=25, retry_count=3, pause=0.001): """ 获取分配预案数据 Parameters -------- year:年份 top:取最新n条数据,默认取最近公布的25条 retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 returns ------- DataFrame code:股票代码 name:股票名称 year:分配年份 report_date:公布日期 divi:分红金额(每10股) shares:转增和送股数(每10股) """ if top == 'all': ct._write_head() df, pages = _dist_cotent(year, 0, retry_count, pause) for idx in range(1,int(pages)): df = df.append(_dist_cotent(year, idx, retry_count, pause), ignore_index=True) return df elif top <= 25: df, pages = _dist_cotent(year, 0, retry_count, pause) return df.head(top) else: if isinstance(top, int): ct._write_head() allPages = top/25+1 if top%25>0 else top/25 df, pages = _dist_cotent(year, 0, retry_count, pause) if int(allPages) < int(pages): pages = allPages for idx in range(1, int(pages)): df = df.append(_dist_cotent(year, idx, retry_count, pause), ignore_index=True) return df.head(top) else: print(ct.TOP_PARAS_MSG) def _fun_divi(x): if ct.PY3: reg = re.compile(r'分红(.*?)元', re.UNICODE) res = reg.findall(x) return 0 if len(res)<1 else float(res[0]) else: if isinstance(x, unicode): s1 = unicode('分红','utf-8') s2 = unicode('元','utf-8') reg = re.compile(r'%s(.*?)%s'%(s1, s2), re.UNICODE) res = reg.findall(x) return 0 if len(res)<1 else float(res[0]) else: return 0 def _fun_into(x): if ct.PY3: reg1 = re.compile(r'转增(.*?)股', re.UNICODE) reg2 = re.compile(r'送股(.*?)股', re.UNICODE) res1 = reg1.findall(x) res2 = reg2.findall(x) res1 = 0 if len(res1)<1 else float(res1[0]) res2 = 0 if len(res2)<1 else float(res2[0]) return res1 + res2 else: if isinstance(x, unicode): s1 = unicode('转增','utf-8') s2 = unicode('送股','utf-8') s3 = unicode('股','utf-8') reg1 = re.compile(r'%s(.*?)%s'%(s1, s3), re.UNICODE) reg2 = re.compile(r'%s(.*?)%s'%(s2, s3), re.UNICODE) res1 = reg1.findall(x) res2 = reg2.findall(x) res1 = 0 if len(res1)<1 else float(res1[0]) res2 = 0 if len(res2)<1 else float(res2[0]) return res1 + res2 else: return 0 def _dist_cotent(year, pageNo, retry_count, pause): for _ in range(retry_count): time.sleep(pause) try: if pageNo > 0: ct._write_console() html = lxml.html.parse(rv.DP_163_URL%(ct.P_TYPE['http'], ct.DOMAINS['163'], ct.PAGES['163dp'], year, pageNo)) res = html.xpath('//div[@class=\"fn_rp_list\"]/table') if ct.PY3: sarr = [etree.tostring(node).decode('utf-8') for node in res] else: sarr = [etree.tostring(node) for node in res] sarr = ''.join(sarr) df = pd.read_html(sarr, skiprows=[0])[0] df = df.drop(df.columns[0], axis=1) df.columns = rv.DP_163_COLS df['divi'] = df['plan'].map(_fun_divi) df['shares'] = df['plan'].map(_fun_into) df = df.drop('plan', axis=1) df['code'] = df['code'].astype(object) df['code'] = df['code'].map(lambda x : str(x).zfill(6)) pages = [] if pageNo == 0: page = html.xpath('//div[@class=\"mod_pages\"]/a') if len(page)>1: asr = page[len(page)-2] pages = asr.xpath('text()') except Exception as e: print(e) else: if pageNo == 0: return df, pages[0] if len(pages)>0 else 0 else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG) def profit_divis(): ''' 获取分送送股数据 ------- Return:DataFrame code:代码 name:证券简称 year:分配年度 bshares:送股 incshares:转增股 totals:送转总数 cash:派现 plandate:预案公布日 regdate:股权登记日 exdate:除权除息日 eventproc:事件进程 ,预案或实施 anndate:公告日期 ''' ct._write_head() p = 'cfidata.aspx?sortfd=&sortway=&curpage=1&fr=content&ndk=A0A1934A1939A1957A1966A1983&xztj=&mystock=' df = _profit_divis(1, pd.DataFrame(), p) df = df.drop([3], axis=1) df.columns = ct.PROFIT_DIVIS df['code'] = df['code'].map(lambda x: str(x).zfill(6)) return df def _profit_divis(pageNo, dataArr, nextPage): ct._write_console() html = lxml.html.parse('%sdata.cfi.cn/%s'%(ct.P_TYPE['http'], nextPage)) res = html.xpath("//table[@class=\"table_data\"]/tr") if ct.PY3: sarr = [etree.tostring(node).decode('utf-8') for node in res] else: sarr = [etree.tostring(node) for node in res] sarr = ''.join(sarr) sarr = sarr.replace('--', '0') sarr = '%s
'%sarr df = pd.read_html(sarr, skiprows=[0])[0] dataArr = dataArr.append(df, ignore_index=True) nextPage = html.xpath('//div[@id=\"content\"]/div[2]/a[last()]/@href')[0] np = nextPage.split('&')[2].split('=')[1] if pageNo < int(np): return _profit_divis(int(np), dataArr, nextPage) else: return dataArr def forecast_data(year, quarter): """ 获取业绩预告数据 Parameters -------- year:int 年度 e.g:2014 quarter:int 季度 :1、2、3、4,只能输入这4个季度 说明:由于是从网站获取的数据,需要一页页抓取,速度取决于您当前网络速度 Return -------- DataFrame code,代码 name,名称 type,业绩变动类型【预增、预亏等】 report_date,发布日期 pre_eps,上年同期每股收益 range,业绩变动范围 """ if ct._check_input(year, quarter) is True: ct._write_head() data = _get_forecast_data(year, quarter, 1, pd.DataFrame()) df = pd.DataFrame(data, columns=ct.FORECAST_COLS) df['code'] = df['code'].map(lambda x: str(x).zfill(6)) return df def _get_forecast_data(year, quarter, pageNo, dataArr): ct._write_console() try: gparser = etree.HTMLParser(encoding='GBK') html = lxml.html.parse(ct.FORECAST_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], ct.PAGES['fd'], year, quarter, pageNo, ct.PAGE_NUM[1]), parser=gparser) res = html.xpath("//table[@class=\"list_table\"]/tr") if ct.PY3: sarr = [etree.tostring(node).decode('utf-8') for node in res] else: sarr = [etree.tostring(node) for node in res] sarr = ''.join(sarr) sarr = sarr.replace('--', '0') sarr = '%s
'%sarr df = pd.read_html(sarr)[0] df = df.drop([4, 5, 8], axis=1) df.columns = ct.FORECAST_COLS dataArr = dataArr.append(df, ignore_index=True) nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick') if len(nextPage)>0: pageNo = re.findall(r'\d+',nextPage[0])[0] return _get_forecast_data(year, quarter, pageNo, dataArr) else: return dataArr except Exception as e: print(e) def xsg_data(year=None, month=None, retry_count=3, pause=0.001): """ 获取限售股解禁数据 Parameters -------- year:年份,默认为当前年 month:解禁月份,默认为当前月 retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 Return ------ DataFrame code:股票代码 name:名称 date:解禁日期 count:解禁数量(万股) ratio:占总盘比率 """ year = du.get_year() if year is None else year month = du.get_month() if month is None else month for _ in range(retry_count): time.sleep(pause) try: request = Request(rv.XSG_URL%(ct.P_TYPE['http'], ct.DOMAINS['em'], ct.PAGES['emxsg'], year, month)) lines = urlopen(request, timeout = 10).read() lines = lines.decode('utf-8') if ct.PY3 else lines except Exception as e: print(e) else: da = lines[3:len(lines)-3] list = [] for row in da.split('","'): list.append([data for data in row.split(',')]) df = pd.DataFrame(list) df = df[[1, 3, 4, 5, 6]] for col in [5, 6]: df[col] = df[col].astype(float) df[5] = df[5]/10000 df[6] = df[6]*100 df[5] = df[5].map(ct.FORMAT) df[6] = df[6].map(ct.FORMAT) df.columns = rv.XSG_COLS return df raise IOError(ct.NETWORK_URL_ERROR_MSG) def fund_holdings(year, quarter, retry_count=3, pause=0.001): """ 获取基金持股数据 Parameters -------- year:年份e.g 2014 quarter:季度(只能输入1,2,3,4这个四个数字) retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 Return ------ DataFrame code:股票代码 name:名称 date:报告日期 nums:基金家数 nlast:与上期相比(增加或减少了) count:基金持股数(万股) clast:与上期相比 amount:基金持股市值 ratio:占流通盘比率 """ start,end = rv.QUARTS_DIC[str(quarter)] if quarter == 1: start = start % str(year-1) end = end%year else: start, end = start%year, end%year ct._write_head() df, pages = _holding_cotent(start, end, 0, retry_count, pause) for idx in range(1, pages): df = df.append(_holding_cotent(start, end, idx, retry_count, pause), ignore_index=True) return df def _holding_cotent(start, end, pageNo, retry_count, pause): for _ in range(retry_count): time.sleep(pause) if pageNo>0: ct._write_console() try: request = Request(rv.FUND_HOLDS_URL%(ct.P_TYPE['http'], ct.DOMAINS['163'], ct.PAGES['163fh'], ct.PAGES['163fh'], pageNo, start, end, _random(5))) lines = urlopen(request, timeout = 10).read() lines = lines.decode('utf-8') if ct.PY3 else lines lines = lines.replace('--', '0') lines = json.loads(lines) data = lines['list'] df = pd.DataFrame(data) df = df.drop(['CODE', 'ESYMBOL', 'EXCHANGE', 'NAME', 'RN', 'SHANGQIGUSHU', 'SHANGQISHIZHI', 'SHANGQISHULIANG'], axis=1) for col in ['GUSHU', 'GUSHUBIJIAO', 'SHIZHI', 'SCSTC27']: df[col] = df[col].astype(float) df['SCSTC27'] = df['SCSTC27']*100 df['GUSHU'] = df['GUSHU']/10000 df['GUSHUBIJIAO'] = df['GUSHUBIJIAO']/10000 df['SHIZHI'] = df['SHIZHI']/10000 df['GUSHU'] = df['GUSHU'].map(ct.FORMAT) df['GUSHUBIJIAO'] = df['GUSHUBIJIAO'].map(ct.FORMAT) df['SHIZHI'] = df['SHIZHI'].map(ct.FORMAT) df['SCSTC27'] = df['SCSTC27'].map(ct.FORMAT) df.columns = rv.FUND_HOLDS_COLS df = df[['code', 'name', 'date', 'nums', 'nlast', 'count', 'clast', 'amount', 'ratio']] except Exception as e: print(e) else: if pageNo == 0: return df, int(lines['pagecount']) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG) def new_stocks(retry_count=3, pause=0.001): """ 获取新股上市数据 Parameters -------- retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 Return ------ DataFrame code:股票代码 xcode:申购代码 name:名称 ipo_date:上网发行日期 issue_date:上市日期 amount:发行数量(万股) markets:上网发行数量(万股) price:发行价格(元) pe:发行市盈率 limit:个人申购上限(万股) funds:募集资金(亿元) ballot:网上中签率(%) """ data = pd.DataFrame() ct._write_head() df = _newstocks(data, 1, retry_count, pause) return df def _newstocks(data, pageNo, retry_count, pause): for _ in range(retry_count): time.sleep(pause) ct._write_console() try: request = Request(rv.NEW_STOCKS_URL%(ct.P_TYPE['http'],ct.DOMAINS['vsf'], ct.PAGES['newstock'], pageNo)) text = urlopen(request, timeout=10).read() text = text.decode('GBK') html = lxml.html.parse(StringIO(text)) res = html.xpath('//table[@id=\"NewStockTable\"]/tr') if len(res) == 0: return data if ct.PY3: sarr = [etree.tostring(node).decode('utf-8') for node in res] else: sarr = [etree.tostring(node) for node in res] sarr = ''.join(sarr) sarr = sarr.replace('*', '') sarr = '%s
'%sarr df = pd.read_html(StringIO(sarr), skiprows=[0, 1])[0] df = df.drop([df.columns[idx] for idx in [12, 13, 14, 15]], axis=1) df.columns = rv.NEW_STOCKS_COLS df['code'] = df['code'].map(lambda x : str(x).zfill(6)) df['xcode'] = df['xcode'].map(lambda x : str(x).zfill(6)) res = html.xpath('//table[@class=\"table2\"]/tr[1]/td[1]/a/text()') tag = '下一页' if ct.PY3 else unicode('下一页', 'utf-8') hasNext = True if tag in res else False data = data.append(df, ignore_index=True) pageNo += 1 if hasNext: data = _newstocks(data, pageNo, retry_count, pause) except Exception as ex: print(ex) else: return data def new_cbonds(default=1, retry_count=3, pause=0.001): """ 获取可转债申购列表 Parameters -------- retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 Return ------ DataFrame bcode:债券代码 bname:债券名称 scode:股票代码 sname:股票名称 xcode:申购代码 amount:发行总数(亿元) marketprice:最新市场价格 convprice:转股价格 firstdayprice:首日收盘价 ipo_date:上网发行日期 issue_date:上市日期 ballot:中签率(%) return:打新收益率(%) perreturn:每中一股收益(万元) """ data = pd.DataFrame() if default == 1: data = _newcbonds(1, retry_count, pause) else: for page in range(1, 50): df = _newcbonds(page, retry_count, pause) if df is not None: data = data.append(df, ignore_index=True) else: break return data def _newcbonds(pageNo, retry_count, pause): for _ in range(retry_count): time.sleep(pause) if pageNo != 1: ct._write_console() try: html = lxml.html.parse(rv.NEW_CBONDS_URL%(ct.P_TYPE['http'],ct.DOMAINS['sstar'], pageNo)) res = html.xpath('//table/tr') if len(res) == 0: return None if ct.PY3: sarr = [etree.tostring(node).decode('utf-8') for node in res] else: sarr = [etree.tostring(node) for node in res] sarr = ''.join(sarr) sarr = '%s
'%sarr df = pd.read_html(StringIO(sarr), skiprows=[0]) if len(df) < 1: return None df = df[0] df = df.drop([df.columns[14], df.columns[15]], axis=1) df.columns = rv.NEW_CBONDS_COLS df['scode'] = df['scode'].map(lambda x: str(x).zfill(6)) df['xcode'] = df['xcode'].map(lambda x: str(x).zfill(6)) except Exception as ex: print(ex) else: return df def sh_margins(start=None, end=None, retry_count=3, pause=0.001): """ 获取沪市融资融券数据列表 Parameters -------- start:string 开始日期 format:YYYY-MM-DD 为空时取去年今日 end:string 结束日期 format:YYYY-MM-DD 为空时取当前日期 retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 Return ------ DataFrame opDate:信用交易日期 rzye:本日融资余额(元) rzmre: 本日融资买入额(元) rqyl: 本日融券余量 rqylje: 本日融券余量金额(元) rqmcl: 本日融券卖出量 rzrqjyzl:本日融资融券余额(元) """ start = du.today_last_year() if start is None else start end = du.today() if end is None else end if du.diff_day(start, end) < 0: return None start, end = start.replace('-', ''), end.replace('-', '') data = pd.DataFrame() ct._write_head() df = _sh_hz(data, start=start, end=end, retry_count=retry_count, pause=pause) return df def _sh_hz(data, start=None, end=None, pageNo='', beginPage='', endPage='', retry_count=3, pause=0.001): for _ in range(retry_count): time.sleep(pause) ct._write_console() try: tail = rv.MAR_SH_HZ_TAIL_URL%(pageNo, beginPage, endPage) if pageNo == '': pageNo = 6 tail = '' else: pageNo += 5 beginPage = pageNo endPage = pageNo + 4 url = rv.MAR_SH_HZ_URL%(ct.P_TYPE['http'], ct.DOMAINS['sseq'], ct.PAGES['qmd'], _random(5), start, end, tail, _random()) ref = rv.MAR_SH_HZ_REF_URL%(ct.P_TYPE['http'], ct.DOMAINS['sse']) clt = Client(url, ref=ref, cookie=rv.MAR_SH_COOKIESTR) lines = clt.gvalue() lines = lines.decode('utf-8') if ct.PY3 else lines lines = lines[19:-1] lines = json.loads(lines) pagecount = int(lines['pageHelp'].get('pageCount')) datapage = int(pagecount/5+1 if pagecount%5>0 else pagecount/5) df = pd.DataFrame(lines['result'], columns=rv.MAR_SH_HZ_COLS) df['opDate'] = df['opDate'].map(lambda x: '%s-%s-%s'%(x[0:4], x[4:6], x[6:8])) data = data.append(df, ignore_index=True) if beginPage < datapage*5: data = _sh_hz(data, start=start, end=end, pageNo=pageNo, beginPage=beginPage, endPage=endPage, retry_count=retry_count, pause=pause) except Exception as e: print(e) else: return data raise IOError(ct.NETWORK_URL_ERROR_MSG) def sh_margin_details(date='', symbol='', start='', end='', retry_count=3, pause=0.001): """ 获取沪市融资融券明细列表 Parameters -------- date:string 明细数据日期 format:YYYY-MM-DD 默认为空'' symbol:string 标的代码,6位数字e.g.600848,默认为空 start:string 开始日期 format:YYYY-MM-DD 默认为空'' end:string 结束日期 format:YYYY-MM-DD 默认为空'' retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 Return ------ DataFrame opDate:信用交易日期 stockCode:标的证券代码 securityAbbr:标的证券简称 rzye:本日融资余额(元) rzmre: 本日融资买入额(元) rzche:本日融资偿还额(元) rqyl: 本日融券余量 rqmcl: 本日融券卖出量 rqchl: 本日融券偿还量 """ date = date if date == '' else date.replace('-', '') start = start if start == '' else start.replace('-', '') end = end if end == '' else end.replace('-', '') if (start != '') & (end != ''): date = '' data = pd.DataFrame() ct._write_head() df = _sh_mx(data, date=date, start=start, end=end, symbol=symbol, retry_count=retry_count, pause=pause) return df def _sh_mx(data, date='', start='', end='', symbol='', pageNo='', beginPage='', endPage='', retry_count=3, pause=0.001): for _ in range(retry_count): time.sleep(pause) ct._write_console() try: tail = '&pageHelp.pageNo=%s&pageHelp.beginPage=%s&pageHelp.endPage=%s'%(pageNo, beginPage, endPage) if pageNo == '': pageNo = 6 tail = '' else: pageNo += 5 beginPage = pageNo endPage = pageNo + 4 ref = rv.MAR_SH_HZ_REF_URL%(ct.P_TYPE['http'], ct.DOMAINS['sse']) clt = Client(rv.MAR_SH_MX_URL%(ct.P_TYPE['http'], ct.DOMAINS['sseq'], ct.PAGES['qmd'], _random(5), date, symbol, start, end, tail, _random()), ref=ref, cookie=rv.MAR_SH_COOKIESTR) lines = clt.gvalue() lines = lines.decode('utf-8') if ct.PY3 else lines lines = lines[19:-1] lines = json.loads(lines) pagecount = int(lines['pageHelp'].get('pageCount')) datapage = int(pagecount/5+1 if pagecount%5>0 else pagecount/5) if pagecount == 0: return data if pageNo == 6: ct._write_tips(lines['pageHelp'].get('total')) df = pd.DataFrame(lines['result'], columns=rv.MAR_SH_MX_COLS) df['opDate'] = df['opDate'].map(lambda x: '%s-%s-%s'%(x[0:4], x[4:6], x[6:8])) data = data.append(df, ignore_index=True) if beginPage < datapage*5: data = _sh_mx(data, start=start, end=end, pageNo=pageNo, beginPage=beginPage, endPage=endPage, retry_count=retry_count, pause=pause) except Exception as e: print(e) else: return data raise IOError(ct.NETWORK_URL_ERROR_MSG) def sz_margins(start=None, end=None, retry_count=3, pause=0.001): """ 获取深市融资融券数据列表 Parameters -------- start:string 开始日期 format:YYYY-MM-DD 默认为上一周的今天 end:string 结束日期 format:YYYY-MM-DD 默认为今日 retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 Return ------ DataFrame opDate:信用交易日期(index) rzmre: 融资买入额(元) rzye:融资余额(元) rqmcl: 融券卖出量 rqyl: 融券余量 rqye: 融券余量(元) rzrqye:融资融券余额(元) """ data = pd.DataFrame() if start is None and end is None: end = du.today() start = du.day_last_week() if start is None or end is None: ct._write_msg(rv.MAR_SZ_HZ_MSG2) return None try: date_range = pd.date_range(start=start, end=end, freq='B') if len(date_range)>261: ct._write_msg(rv.MAR_SZ_HZ_MSG) else: ct._write_head() for date in date_range: data = data.append(_sz_hz(str(date.date()), retry_count, pause) ) except: ct._write_msg(ct.DATA_INPUT_ERROR_MSG) else: return data def _sz_hz(date='', retry_count=3, pause=0.001): for _ in range(retry_count): time.sleep(pause) ct._write_console() try: request = Request(rv.MAR_SZ_HZ_URL%(ct.P_TYPE['http'], ct.DOMAINS['szse'], ct.PAGES['szsefc'], date)) lines = urlopen(request, timeout = 10).read() if len(lines) <= 200: return pd.DataFrame() df = pd.read_html(lines, skiprows=[0])[0] df.columns = rv.MAR_SZ_HZ_COLS df['opDate'] = date except Exception as e: print(e) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG) def sz_margin_details(date='', retry_count=3, pause=0.001): """ 获取深市融资融券明细列表 Parameters -------- date:string 明细数据日期 format:YYYY-MM-DD 默认为空'' retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 Return ------ DataFrame opDate:信用交易日期 stockCode:标的证券代码 securityAbbr:标的证券简称 rzmre: 融资买入额(元) rzye:融资余额(元) rqmcl: 融券卖出量 rqyl: 融券余量 rqye: 融券余量(元) rzrqye:融资融券余额(元) """ for _ in range(retry_count): time.sleep(pause) try: request = Request(rv.MAR_SZ_MX_URL%(ct.P_TYPE['http'], ct.DOMAINS['szse'], ct.PAGES['szsefc'], date)) lines = urlopen(request, timeout = 10).read() if len(lines) <= 200: return pd.DataFrame() df = pd.read_html(lines, skiprows=[0])[0] df.columns = rv.MAR_SZ_MX_COLS df['stockCode'] = df['stockCode'].map(lambda x:str(x).zfill(6)) df['opDate'] = date except Exception as e: print(e) else: return df raise IOError(ct.NETWORK_URL_ERROR_MSG) def top10_holders(code=None, year=None, quarter=None, gdtype='0', retry_count=3, pause=0.001): if code is None: return None else: code = ct._code_to_symbol(code) gdtype = 'LT' if gdtype == '1' else '' qdate = '' if (year is not None) & (quarter is not None): qdate = du.get_q_date(year, quarter) for _ in range(retry_count): time.sleep(pause) try: request = Request(rv.TOP10_HOLDERS_URL%(ct.P_TYPE['http'], ct.DOMAINS['gw'], gdtype, code.upper())) lines = urlopen(request, timeout = 10).read() lines = lines.decode('utf8') if ct.PY3 else lines reg = re.compile(r'= \'\[(.*?)\]\';') lines = reg.findall(lines)[0] jss = json.loads('[%s]' %lines) summ = [] data = pd.DataFrame() for row in jss: qt = row['jzrq'] if 'jzrq' in row.keys() else None hold = row['ljcy'] if 'ljcy' in row.keys() else None change = row['ljbh'] if 'ljbh' in row.keys() else None props = row['ljzb'] if 'ljzb' in row.keys() else None arow = [qt, hold, change ,props] summ.append(arow) ls = row['sdgdList'] if 'sdgdList' in row.keys() else None dlist = [] for inrow in ls: sharetype = inrow['gbxz'] name = inrow['gdmc'] hold = inrow['cgs'] h_pro = inrow['zzgs'] status = inrow['zjqk'] dlist.append([qt, name, hold, h_pro, sharetype, status]) ddata = pd.DataFrame(dlist, columns=rv.TOP10_PER_COLS) data = data.append(ddata, ignore_index=True) df = pd.DataFrame(summ, columns=rv.TOP10_SUMM_COLS) if qdate != '': df = df[df.quarter == qdate] data = data[data.quarter == qdate] except Exception as e: print(e) else: return df, data raise IOError(ct.NETWORK_URL_ERROR_MSG) def moneyflow_hsgt(): """ 获取沪深港通资金流向 return: DataFrame,单位: 百万元 -------------- date: 交易日期 ggt_ss: 港股通(沪) ggt_sz: 港股通(深) hgt: 沪港通 sgt: 深港通 north_money: 北向资金流入 south_money: 南向资金流入 """ clt = Client(rv.HSGT_DATA%(ct.P_TYPE['http'], ct.DOMAINS['em']), ref=rv.HSGT_REF%(ct.P_TYPE['http'], ct.DOMAINS['em'], ct.PAGES['index'])) content = clt.gvalue() content = content.decode('utf-8') if ct.PY3 else content js = json.loads(content) df = pd.DataFrame(js) df['DateTime'] = df['DateTime'].map(lambda x: x[0:10]) df = df.replace('-', np.NaN) df = df[rv.HSGT_TEMP] df.columns = rv.HSGT_COLS df = df.sort_values('date', ascending=False) return df def margin_detail(date=''): """ 沪深融券融券明细 Parameters --------------- date:string 日期 format:YYYY-MM-DD 或者 YYYYMMDD return DataFrame -------------- code: 证券代码 name: 证券名称 buy: 今日买入额 buy_total:融资余额 sell: 今日卖出量(股) sell_total: 融券余量(股) sell_amount: 融券余额 total: 融资融券余额(元) buy_repay: 本日融资偿还额(元) sell_repay: 本日融券偿还量 """ date = str(date).replace('-', '') df = pd.read_csv(ct.MG_URL%(ct.P_TYPE['http'], ct.DOMAINS['oss'], date[0:6], 'mx', date), dtype={'code': object}) return df def margin_target(date=''): """ 沪深融券融券标的 Parameters --------------- date:string 日期 format:YYYY-MM-DD 或者 YYYYMMDD return DataFrame -------------- code: 证券代码 name: 证券名称 long: 融资标的 short: 融券标的 """ date = str(date).replace('-', '') df = pd.read_csv(ct.MG_URL%(ct.P_TYPE['http'], ct.DOMAINS['oss'], date[0:6], 'bd', date), dtype={'code': object}) return df def margin_offset(date): """ 融资融券可充抵保证金证券 Parameters --------------- date:string 日期 format:YYYY-MM-DD 或者 YYYYMMDD return DataFrame -------------- code: 证券代码 name: 证券名称 """ date = str(date).replace('-', '') df = pd.read_csv(ct.MG_URL%(ct.P_TYPE['http'], ct.DOMAINS['oss'], date[0:6], 'cd', date), dtype={'code': object}) return df def stock_pledged(): """ 股票质押数据 return DataFrame -------------- code: 证券代码 name: 证券名称 deals: 质押次数 unrest_pledged: 无限售股质押数量(万) rest_pledged: 限售股质押数量(万) totals: 总股本 p_ratio:质押比例(%) """ df = pd.read_csv(ct.GPZY_URL%(ct.P_TYPE['http'], ct.DOMAINS['oss'], 'gpzy'), dtype={'code': object}) return df def pledged_detail(): """ 股票质押数据 return DataFrame -------------- code: 证券代码 name: 证券名称 ann_date: 公告日期 pledgor:出质人 pledgee:质权人 volume:质押数量 from_date:质押日期 end_date: 解除日期 """ df = pd.read_csv(ct.GPZY_D_URL%(ct.P_TYPE['http'], ct.DOMAINS['oss'], 'gpzy_detail'), dtype={'code': object, 'ann_date': object, 'end_date': object}) df['code'] = df['code'].map(lambda x : str(x).zfill(6)) df['end_date'] = np.where(df['end_date'] == '--', np.NaN, df['end_date']) return df def margin_zsl(date='', broker=''): """ 融资融券充抵保证金折算率 Parameters --------------- date:string 日期 format:YYYY-MM-DD 或者 YYYYMMDD broker: gtja:国泰君安 yhzq:银河证券 gfzq:广发证券 zszq:招商证券 gxzq:国信证券 swhy:申万宏源 zxjt:中信建投 zxzq:中信证券 return DataFrame -------------- code: 证券代码 name: 证券名称 ratio:比率 broker:券商代码 """ date = str(date).replace('-', '') df = pd.read_csv(ct.MG_ZSL_URL%(ct.P_TYPE['http'], ct.DOMAINS['oss'], date[0:6], broker, date), dtype={'code': object}) return df def stock_issuance(start_date='', end_date=''): """ 股票增发 Parameters --------------- start_date:string end_date:string 日期 format:YYYY-MM-DD return DataFrame -------------- code: 证券代码 name: 证券名称 type:类型,定向增发/公开增发 count:数量 price:增发价格 close:最近收盘价 issue_date:增发日期 list_date:上市日期 locked_year:锁定年数 prem:截止当前溢价(%) """ df = pd.read_csv(ct.ZF%(ct.P_TYPE['http'], ct.DOMAINS['oss'], 'zf'), dtype={'code': object}) if start_date != '' and start_date is not None: df = df[df.issue_date >= start_date] if end_date != '' and end_date is not None: df = df[df.issue_date <= start_date] df['prem'] = (df['close'] - df['price']) / df['price'] * 100 df['prem'] = df['prem'].map(ct.FORMAT) df['prem'] = df['prem'].astype(float) return df def _random(n=13): from random import randint start = 10**(n-1) end = (10**n)-1 return str(randint(start, end)) if __name__ == '__main__': df = new_stocks() print(df)