更新数据,更新了判断是否停牌来区分数据是否最新, 更新了根据时间来决定是否按照当前日期来作为最新日期,
This commit is contained in:
BIN
__pycache__/check_market_data.cpython-310.pyc
Normal file
BIN
__pycache__/check_market_data.cpython-310.pyc
Normal file
Binary file not shown.
@@ -236,6 +236,209 @@ def check_online_data_exists(ts_code, trade_date):
|
||||
logger.error(f"查询在线数据失败 {ts_code} {trade_date}: {str(e)}")
|
||||
return None
|
||||
|
||||
def get_suspend_info(ts_code, start_date, end_date):
|
||||
"""
|
||||
获取股票停牌信息
|
||||
:param ts_code: 股票代码,如'688800.SH'
|
||||
:param start_date: 开始日期,格式YYYYMMDD
|
||||
:param end_date: 结束日期,格式YYYYMMDD
|
||||
:return: 停牌信息DataFrame,如果查询失败返回None
|
||||
"""
|
||||
try:
|
||||
pro = tushare_manager.get_pro_api()
|
||||
tushare_manager.control_request_rate()
|
||||
|
||||
# 查询指定日期范围内的停牌数据
|
||||
df = pro.suspend_d(ts_code=ts_code, start_date=start_date, end_date=end_date)
|
||||
|
||||
if df is None or df.empty:
|
||||
logger.info(f"未找到 {ts_code} 在 {start_date} 至 {end_date} 期间的停牌数据")
|
||||
return None
|
||||
else:
|
||||
logger.info(f"找到 {ts_code} 在 {start_date} 至 {end_date} 期间的停牌数据: {len(df)}条")
|
||||
return df
|
||||
except Exception as e:
|
||||
logger.error(f"查询停牌信息失败 {ts_code} {start_date}-{end_date}: {str(e)}")
|
||||
return None
|
||||
|
||||
def check_stock_suspended(ts_code, check_date):
|
||||
"""
|
||||
检查股票在指定日期是否停牌
|
||||
:param ts_code: 股票代码,如'688800.SH'
|
||||
:param check_date: 检查日期,格式YYYYMMDD
|
||||
:return: (is_suspended, suspend_dates, latest_suspend_start) -
|
||||
is_suspended表示是否停牌,suspend_dates表示停牌日期范围,latest_suspend_start表示最近的停牌开始日期
|
||||
"""
|
||||
try:
|
||||
# 为了确保获取完整的停牌信息,查询范围扩大1个月
|
||||
check_dt = datetime.datetime.strptime(check_date, '%Y%m%d')
|
||||
start_date = (check_dt - datetime.timedelta(days=30)).strftime('%Y%m%d')
|
||||
end_date = (check_dt + datetime.timedelta(days=30)).strftime('%Y%m%d')
|
||||
|
||||
# 获取停牌信息
|
||||
suspend_df = get_suspend_info(ts_code, start_date, end_date)
|
||||
|
||||
if suspend_df is None:
|
||||
return False, "", None # 未找到停牌数据,假设未停牌
|
||||
|
||||
suspend_dates_list = []
|
||||
is_suspended = False
|
||||
latest_suspend_start = None
|
||||
|
||||
# 检查指定日期是否在任何停牌期间内
|
||||
for _, row in suspend_df.iterrows():
|
||||
# 检查是否是单日停牌数据格式(包含trade_date和suspend_type)
|
||||
if 'trade_date' in row and 'suspend_type' in row:
|
||||
trade_date = row['trade_date']
|
||||
suspend_type = row['suspend_type']
|
||||
|
||||
# 更新最近的停牌日期
|
||||
if latest_suspend_start is None or trade_date > latest_suspend_start:
|
||||
latest_suspend_start = trade_date
|
||||
|
||||
# 如果suspend_type为'S',表示该日停牌
|
||||
if suspend_type == 'S':
|
||||
# 保存停牌日期
|
||||
suspend_dates_list.append(trade_date)
|
||||
|
||||
# 检查当前检查日期是否就是停牌日期
|
||||
if trade_date == check_date:
|
||||
logger.info(f"股票 {ts_code} 在 {check_date} 处于停牌状态")
|
||||
is_suspended = True
|
||||
continue
|
||||
|
||||
# 处理传统的停牌数据格式(包含suspend_date和resume_date)
|
||||
if 'suspend_date' not in row or 'resume_date' not in row:
|
||||
# 检查是否是有效数据(可能是API返回的其他格式)
|
||||
if 'ts_code' in row and 'trade_date' in row:
|
||||
# 这是有效的单日停牌数据,只是字段名称不同
|
||||
trade_date = row['trade_date']
|
||||
logger.info(f"股票 {ts_code} 在 {trade_date} 处于停牌状态")
|
||||
if trade_date == check_date:
|
||||
is_suspended = True
|
||||
suspend_dates_list.append(trade_date)
|
||||
latest_suspend_start = trade_date
|
||||
else:
|
||||
logger.warning(f"停牌数据缺少必要字段: {row}")
|
||||
continue
|
||||
|
||||
suspend_start = row['suspend_date']
|
||||
suspend_end = row['resume_date']
|
||||
|
||||
# 更新最近的停牌开始日期
|
||||
if latest_suspend_start is None or suspend_start > latest_suspend_start:
|
||||
latest_suspend_start = suspend_start
|
||||
|
||||
# 如果恢复日期为None或00000000,表示尚未复牌
|
||||
if not suspend_end or suspend_end == '00000000':
|
||||
suspend_end = end_date # 使用查询结束日期
|
||||
|
||||
# 保存停牌日期范围
|
||||
suspend_dates_list.append(f"{suspend_start}-{suspend_end}")
|
||||
|
||||
# 检查日期是否在停牌期间内
|
||||
if suspend_start <= check_date <= suspend_end:
|
||||
logger.info(f"股票 {ts_code} 在 {check_date} 处于停牌状态({suspend_start}至{suspend_end})")
|
||||
is_suspended = True
|
||||
|
||||
# 合并停牌日期范围
|
||||
suspend_dates = ", ".join(suspend_dates_list)
|
||||
|
||||
if not is_suspended:
|
||||
logger.info(f"股票 {ts_code} 在 {check_date} 未处于停牌状态")
|
||||
|
||||
return is_suspended, suspend_dates, latest_suspend_start
|
||||
except Exception as e:
|
||||
logger.error(f"检查股票停牌状态失败 {ts_code} {check_date}: {str(e)}")
|
||||
return None, "", None
|
||||
|
||||
def update_stock_data(ts_code):
|
||||
"""
|
||||
更新指定股票的行情数据
|
||||
:param ts_code: 股票代码,如'688800.SH'
|
||||
:return: True表示更新成功,False表示更新失败
|
||||
"""
|
||||
try:
|
||||
logger.info(f"开始更新股票 {ts_code} 的行情数据")
|
||||
|
||||
# 构建输出文件路径
|
||||
output_file = Config.DATA_DIR / f"{ts_code}_daily_data.txt"
|
||||
|
||||
# 获取Tushare API实例
|
||||
pro = tushare_manager.get_pro_api()
|
||||
|
||||
# 检查是否存在现有数据文件
|
||||
if output_file.exists():
|
||||
# 读取现有数据,获取最新的交易日期
|
||||
try:
|
||||
# 使用与update_tushare_totxt.py相同的方式读取数据
|
||||
df = pd.read_csv(output_file, sep='\t', encoding='utf-8')
|
||||
|
||||
if not df.empty and 'trade_date' in df.columns:
|
||||
# 获取最新交易日期
|
||||
latest_date = df['trade_date'].max()
|
||||
|
||||
# 计算下一个交易日的起始日期(避免重复获取同一天数据)
|
||||
latest_dt = datetime.datetime.strptime(str(latest_date), '%Y%m%d')
|
||||
next_dt = latest_dt + datetime.timedelta(days=1)
|
||||
next_date = next_dt.strftime('%Y%m%d')
|
||||
|
||||
logger.info(f"股票 {ts_code} 现有最新日期: {latest_date},将获取 {next_date} 至今的数据")
|
||||
|
||||
# 控制请求频率
|
||||
tushare_manager.control_request_rate()
|
||||
|
||||
# 获取最新日期之后的数据
|
||||
new_df = pro.daily(ts_code=ts_code, start_date=next_date)
|
||||
|
||||
if new_df is not None and not new_df.empty:
|
||||
logger.info(f"获取到 {ts_code} 的新数据 {len(new_df)} 条")
|
||||
|
||||
# 合并现有数据和新数据
|
||||
combined_df = pd.concat([df, new_df], ignore_index=True)
|
||||
|
||||
# 去重,避免重复数据
|
||||
combined_df = combined_df.drop_duplicates(subset=['trade_date', 'ts_code'], keep='last')
|
||||
|
||||
# 按交易日期降序排序,最新交易日排在最前面
|
||||
combined_df = combined_df.sort_values('trade_date', ascending=False)
|
||||
|
||||
# 保存合并后的数据
|
||||
combined_df.to_csv(output_file, index=False, sep='\t', encoding='utf-8')
|
||||
logger.info(f"股票 {ts_code} 的行情数据已成功更新")
|
||||
return True
|
||||
else:
|
||||
logger.info(f"未获取到股票 {ts_code} 的新数据")
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"文件 {output_file} 内容异常,重新获取全部数据")
|
||||
except Exception as e:
|
||||
logger.error(f"读取文件 {output_file} 失败: {str(e)}")
|
||||
|
||||
# 文件不存在或读取失败,获取全部数据
|
||||
logger.info(f"获取股票 {ts_code} 的全部行情数据")
|
||||
|
||||
# 控制请求频率
|
||||
tushare_manager.control_request_rate()
|
||||
|
||||
# 获取全部数据
|
||||
full_df = pro.daily(ts_code=ts_code)
|
||||
|
||||
if full_df is not None and not full_df.empty:
|
||||
# 按交易日期降序排序,最新交易日排在最前面
|
||||
full_df = full_df.sort_values('trade_date', ascending=False)
|
||||
|
||||
# 保存数据
|
||||
full_df.to_csv(output_file, index=False, sep='\t', encoding='utf-8')
|
||||
logger.info(f"股票 {ts_code} 的行情数据已成功获取并保存")
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"未能获取到股票 {ts_code} 的行情数据")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"更新股票 {ts_code} 数据失败: {str(e)}")
|
||||
return False
|
||||
|
||||
def check_market_data(online_check=Config.DEFAULT_ONLINE_CHECK):
|
||||
"""
|
||||
检查所有行情数据文件的完整性
|
||||
@@ -246,9 +449,19 @@ def check_market_data(online_check=Config.DEFAULT_ONLINE_CHECK):
|
||||
# 设置数据目录
|
||||
data_dir = Config.DATA_DIR
|
||||
|
||||
# 获取当天日期(格式:YYYYMMDD)
|
||||
today = datetime.datetime.now().strftime('%Y%m%d')
|
||||
logger.info(f"开始检查行情数据完整性,当前日期:{today}")
|
||||
# 获取当前时间
|
||||
now = datetime.datetime.now()
|
||||
today = now.strftime('%Y%m%d')
|
||||
|
||||
# 添加时间判断逻辑:如果当前时间早于16:00,检查日期为前一天;否则为当天
|
||||
if now.hour < 16:
|
||||
# 获取前一天日期
|
||||
yesterday = now - datetime.timedelta(days=1)
|
||||
check_date = yesterday.strftime('%Y%m%d')
|
||||
logger.info(f"当前时间{now.strftime('%Y-%m-%d %H:%M:%S')}早于16:00,检查日期调整为前一天:{check_date}")
|
||||
else:
|
||||
check_date = today
|
||||
logger.info(f"开始检查行情数据完整性,检查日期:{check_date}")
|
||||
|
||||
# 获取所有txt文件列表
|
||||
all_files = list(data_dir.glob('*.txt'))
|
||||
@@ -278,21 +491,24 @@ def check_market_data(online_check=Config.DEFAULT_ONLINE_CHECK):
|
||||
'latest_date': 'N/A',
|
||||
'trading_days_diff': 'N/A',
|
||||
'online_data_exists': 'N/A',
|
||||
'status': '文件内容异常'
|
||||
'status': '文件内容异常',
|
||||
'是否在停牌状态': 'N/A',
|
||||
'停牌的日期': 'N/A'
|
||||
})
|
||||
elif latest_date != today:
|
||||
elif latest_date != check_date:
|
||||
# 计算交易日差
|
||||
trading_days_diff = calculate_trading_days_diff(latest_date, today)
|
||||
trading_days_diff = calculate_trading_days_diff(latest_date, check_date)
|
||||
|
||||
# 检查在线数据是否存在
|
||||
online_data_exists = None
|
||||
if online_check:
|
||||
online_data_exists = check_online_data_exists(ts_code, today)
|
||||
online_data_exists = check_online_data_exists(ts_code, check_date)
|
||||
|
||||
status = '数据不完整'
|
||||
if online_check and online_data_exists:
|
||||
status += ',在线数据已更新'
|
||||
|
||||
# 先收集数据不完整的个股,不进行停牌检查
|
||||
incomplete_files.append({
|
||||
'file_name': file_name,
|
||||
'ts_code': ts_code,
|
||||
@@ -301,6 +517,7 @@ def check_market_data(online_check=Config.DEFAULT_ONLINE_CHECK):
|
||||
'online_data_exists': '是' if online_data_exists else '否' if online_data_exists is False else '未检查',
|
||||
'status': status
|
||||
})
|
||||
|
||||
# 移除单个文件的完整日志
|
||||
|
||||
# 更新进度
|
||||
@@ -311,13 +528,65 @@ def check_market_data(online_check=Config.DEFAULT_ONLINE_CHECK):
|
||||
# 显示进度条
|
||||
print(f"\r进度: [{'#' * int(progress / 2)}{' ' * (50 - int(progress / 2))}] {progress:.1f}% | 已完成: {completed}/{total} | 耗时: {elapsed:.1f}s", end='', flush=True)
|
||||
|
||||
# 添加调试信息
|
||||
logger.info(f"收集到的不完整文件数量: {len(incomplete_files)}")
|
||||
if incomplete_files:
|
||||
logger.info(f"前5个不完整文件示例: {[f['ts_code'] for f in incomplete_files[:5]]}")
|
||||
|
||||
# 进度条完成后换行
|
||||
print()
|
||||
|
||||
# 对收集到的不完整个股进行统一的停牌检查
|
||||
logger.info(f"开始对 {len(incomplete_files)} 个数据不完整的个股进行停牌检查")
|
||||
|
||||
# 创建新的列表存储经过停牌检查后的结果
|
||||
final_incomplete_files = []
|
||||
|
||||
for file_info in incomplete_files:
|
||||
ts_code = file_info['ts_code']
|
||||
latest_date = file_info['latest_date']
|
||||
|
||||
# 如果是文件内容异常,直接添加到最终列表
|
||||
if file_info['status'] == '文件内容异常':
|
||||
final_incomplete_files.append(file_info)
|
||||
continue
|
||||
|
||||
# 进行停牌检查
|
||||
is_suspended, suspend_dates, latest_suspend_start = check_stock_suspended(ts_code, check_date)
|
||||
|
||||
# 更新文件信息
|
||||
file_info['是否在停牌状态'] = '是' if is_suspended else '否' if is_suspended is not None else '检查失败'
|
||||
file_info['停牌的日期'] = suspend_dates if suspend_dates else '无'
|
||||
|
||||
if is_suspended is True:
|
||||
logger.info(f"股票 {ts_code} 当前处于停牌状态")
|
||||
|
||||
if latest_suspend_start is not None:
|
||||
# 检查最新行情是否是停盘那天
|
||||
if latest_date == latest_suspend_start:
|
||||
logger.info(f"股票 {ts_code} 的最新行情日期 {latest_date} 与停牌开始日期 {latest_suspend_start} 一致,不输出报告")
|
||||
continue # 跳过输出报告
|
||||
else:
|
||||
logger.info(f"股票 {ts_code} 的最新行情日期 {latest_date} 与停牌开始日期 {latest_suspend_start} 不一致,开始更新数据")
|
||||
# 更新数据到最新
|
||||
update_stock_data(ts_code)
|
||||
# 更新最新日期为停牌开始日期
|
||||
file_info['latest_date'] = latest_suspend_start
|
||||
# 继续输出报告,因为已经更新了数据
|
||||
elif is_suspended is None:
|
||||
logger.warning(f"股票 {ts_code} 的停牌检查失败,继续输出报告")
|
||||
|
||||
# 如果没有停牌或停牌检查失败,添加到最终列表
|
||||
final_incomplete_files.append(file_info)
|
||||
|
||||
# 更新incomplete_files为经过停牌检查后的最终列表
|
||||
incomplete_files = final_incomplete_files
|
||||
logger.info(f"停牌检查完成,剩余 {len(incomplete_files)} 个需要输出报告的不完整个股")
|
||||
|
||||
# 输出结果到CSV文件
|
||||
output_file = Config.OUTPUT_FILE
|
||||
with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
|
||||
fieldnames = ['file_name', 'ts_code', 'latest_date', 'trading_days_diff', 'online_data_exists', 'status']
|
||||
fieldnames = ['file_name', 'ts_code', 'latest_date', 'trading_days_diff', 'online_data_exists', 'status', '是否在停牌状态', '停牌的日期']
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
|
||||
writer.writeheader()
|
||||
|
||||
52203
market_data_check.log
52203
market_data_check.log
File diff suppressed because it is too large
Load Diff
52043
market_data_check.log.2025-12-16
Normal file
52043
market_data_check.log.2025-12-16
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,11 +1,11 @@
|
||||
file_name,ts_code,latest_date,trading_days_diff,online_data_exists,status
|
||||
002166.SZ_daily_data.txt,002166.SZ,20251209,6,未检查,数据不完整
|
||||
002769.SZ_daily_data.txt,002769.SZ,20251203,10,未检查,数据不完整
|
||||
300068.SZ_daily_data.txt,300068.SZ,20251211,4,未检查,数据不完整
|
||||
300291.SZ_daily_data.txt,300291.SZ,20251215,2,未检查,数据不完整
|
||||
300710.SZ_daily_data.txt,300710.SZ,20251203,10,未检查,数据不完整
|
||||
600730.SH_daily_data.txt,600730.SH,20251212,3,未检查,数据不完整
|
||||
600800.SH_daily_data.txt,600800.SH,20251205,8,未检查,数据不完整
|
||||
601059.SH_daily_data.txt,601059.SH,20251119,20,未检查,数据不完整
|
||||
601198.SH_daily_data.txt,601198.SH,20251119,20,未检查,数据不完整
|
||||
601995.SH_daily_data.txt,601995.SH,20251119,20,未检查,数据不完整
|
||||
file_name,ts_code,latest_date,trading_days_diff,online_data_exists,status,是否在停牌状态,停牌的日期
|
||||
002166.SZ_daily_data.txt,002166.SZ,20251216,6,未检查,数据不完整,是,"20251210, 20251211, 20251212, 20251215, 20251216"
|
||||
002769.SZ_daily_data.txt,002769.SZ,20251216,10,未检查,数据不完整,是,"20251204, 20251205, 20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216"
|
||||
300068.SZ_daily_data.txt,300068.SZ,20251216,4,未检查,数据不完整,是,"20251212, 20251215, 20251216"
|
||||
300291.SZ_daily_data.txt,300291.SZ,20251216,2,未检查,数据不完整,是,20251216
|
||||
300710.SZ_daily_data.txt,300710.SZ,20251216,10,未检查,数据不完整,是,"20251204, 20251205, 20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216"
|
||||
600730.SH_daily_data.txt,600730.SH,20251216,3,未检查,数据不完整,是,"20251215, 20251216"
|
||||
600800.SH_daily_data.txt,600800.SH,20251216,8,未检查,数据不完整,是,"20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216"
|
||||
601059.SH_daily_data.txt,601059.SH,20251216,20,未检查,数据不完整,是,"20251120, 20251121, 20251124, 20251125, 20251126, 20251127, 20251128, 20251201, 20251202, 20251203, 20251204, 20251205, 20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216"
|
||||
601198.SH_daily_data.txt,601198.SH,20251216,20,未检查,数据不完整,是,"20251120, 20251121, 20251124, 20251125, 20251126, 20251127, 20251128, 20251201, 20251202, 20251203, 20251204, 20251205, 20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216"
|
||||
601995.SH_daily_data.txt,601995.SH,20251216,20,未检查,数据不完整,是,"20251120, 20251121, 20251124, 20251125, 20251126, 20251127, 20251128, 20251201, 20251202, 20251203, 20251204, 20251205, 20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216"
|
||||
|
Reference in New Issue
Block a user