87 lines
3.5 KiB
Python
87 lines
3.5 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding:utf-8 -*-
|
|
"""
|
|
龙虎榜数据
|
|
Created on 2017年8月13日
|
|
@author: Jimmy Liu
|
|
@group : waditu
|
|
@contact: jimmysoa@sina.cn
|
|
"""
|
|
|
|
import pandas as pd
|
|
v = pd.__version__
|
|
if int(v.split('.')[1])>=25 or int(v.split('.')[0])>0:
|
|
from io import StringIO
|
|
else:
|
|
from pandas.compat import StringIO
|
|
from tushare.stock import cons as ct
|
|
import time
|
|
import re
|
|
import lxml.html
|
|
from lxml import etree
|
|
try:
|
|
from urllib.request import urlopen, Request
|
|
except ImportError:
|
|
from urllib2 import urlopen, Request
|
|
|
|
def bdi(itype='D', retry_count=3,
|
|
pause=0.001):
|
|
for _ in range(retry_count):
|
|
time.sleep(pause)
|
|
try:
|
|
request = Request(ct.BDI_URL%(ct.P_TYPE['http'], ct.DOMAINS['v500']))
|
|
lines = urlopen(request, timeout = 10).read()
|
|
if len(lines) < 100: #no data
|
|
return None
|
|
except Exception as e:
|
|
print(e)
|
|
else:
|
|
linestr = lines.decode('utf-8') if ct.PY3 else lines
|
|
if itype == 'D': # Daily
|
|
reg = re.compile(r'\"chart_data\",\"(.*?)\"\);')
|
|
lines = reg.findall(linestr)
|
|
lines = lines[0]
|
|
lines = lines.replace('chart', 'table').\
|
|
replace('</series><graphs>', '').\
|
|
replace('</graphs>', '').\
|
|
replace('series', 'tr').\
|
|
replace('value', 'td').\
|
|
replace('graph', 'tr').\
|
|
replace('graphs', 'td')
|
|
df = pd.read_html(lines, encoding='utf8')[0]
|
|
df = df.T
|
|
df.columns = ['date', 'index']
|
|
df['date'] = df['date'].map(lambda x: x.replace(u'年', '-')).\
|
|
map(lambda x: x.replace(u'月', '-')).\
|
|
map(lambda x: x.replace(u'日', ''))
|
|
df['date'] = pd.to_datetime(df['date'])
|
|
df['index'] = df['index'].astype(float)
|
|
df = df.sort_values('date', ascending=False).reset_index(drop = True)
|
|
df['change'] = df['index'].pct_change(-1)
|
|
df['change'] = df['change'] * 100
|
|
df['change'] = df['change'].map(lambda x: '%.2f' % x)
|
|
df['change'] = df['change'].astype(float)
|
|
return df
|
|
else: #Weekly
|
|
html = lxml.html.parse(StringIO(linestr))
|
|
res = html.xpath("//table[@class=\"style33\"]/tr/td/table[last()]")
|
|
if ct.PY3:
|
|
sarr = [etree.tostring(node).decode('utf-8') for node in res]
|
|
else:
|
|
sarr = [etree.tostring(node) for node in res]
|
|
sarr = ''.join(sarr)
|
|
sarr = '<table>%s</table>'%sarr
|
|
df = pd.read_html(sarr)[0][1:]
|
|
df.columns = ['month', 'index']
|
|
df['month'] = df['month'].map(lambda x: x.replace(u'年', '-')).\
|
|
map(lambda x: x.replace(u'月', ''))
|
|
df['month'] = pd.to_datetime(df['month'])
|
|
df['month'] = df['month'].map(lambda x: str(x).replace('-', '')).\
|
|
map(lambda x: x[:6])
|
|
df['index'] = df['index'].astype(float)
|
|
df['change'] = df['index'].pct_change(-1)
|
|
df['change'] = df['change'].map(lambda x: '%.2f' % x)
|
|
df['change'] = df['change'].astype(float)
|
|
return df
|
|
|