更新数据,更新了判断是否停牌来区分数据是否最新, 更新了根据时间来决定是否按照当前日期来作为最新日期,

This commit is contained in:
2025-12-17 01:45:12 +08:00
parent 9fc2f4345a
commit 9b8096cc77
5 changed files with 52491 additions and 52062 deletions

Binary file not shown.

View File

@@ -236,6 +236,209 @@ def check_online_data_exists(ts_code, trade_date):
logger.error(f"查询在线数据失败 {ts_code} {trade_date}: {str(e)}")
return None
def get_suspend_info(ts_code, start_date, end_date):
"""
获取股票停牌信息
:param ts_code: 股票代码,如'688800.SH'
:param start_date: 开始日期格式YYYYMMDD
:param end_date: 结束日期格式YYYYMMDD
:return: 停牌信息DataFrame如果查询失败返回None
"""
try:
pro = tushare_manager.get_pro_api()
tushare_manager.control_request_rate()
# 查询指定日期范围内的停牌数据
df = pro.suspend_d(ts_code=ts_code, start_date=start_date, end_date=end_date)
if df is None or df.empty:
logger.info(f"未找到 {ts_code}{start_date}{end_date} 期间的停牌数据")
return None
else:
logger.info(f"找到 {ts_code}{start_date}{end_date} 期间的停牌数据: {len(df)}")
return df
except Exception as e:
logger.error(f"查询停牌信息失败 {ts_code} {start_date}-{end_date}: {str(e)}")
return None
def check_stock_suspended(ts_code, check_date):
"""
检查股票在指定日期是否停牌
:param ts_code: 股票代码,如'688800.SH'
:param check_date: 检查日期格式YYYYMMDD
:return: (is_suspended, suspend_dates, latest_suspend_start) -
is_suspended表示是否停牌suspend_dates表示停牌日期范围latest_suspend_start表示最近的停牌开始日期
"""
try:
# 为了确保获取完整的停牌信息查询范围扩大1个月
check_dt = datetime.datetime.strptime(check_date, '%Y%m%d')
start_date = (check_dt - datetime.timedelta(days=30)).strftime('%Y%m%d')
end_date = (check_dt + datetime.timedelta(days=30)).strftime('%Y%m%d')
# 获取停牌信息
suspend_df = get_suspend_info(ts_code, start_date, end_date)
if suspend_df is None:
return False, "", None # 未找到停牌数据,假设未停牌
suspend_dates_list = []
is_suspended = False
latest_suspend_start = None
# 检查指定日期是否在任何停牌期间内
for _, row in suspend_df.iterrows():
# 检查是否是单日停牌数据格式包含trade_date和suspend_type
if 'trade_date' in row and 'suspend_type' in row:
trade_date = row['trade_date']
suspend_type = row['suspend_type']
# 更新最近的停牌日期
if latest_suspend_start is None or trade_date > latest_suspend_start:
latest_suspend_start = trade_date
# 如果suspend_type为'S',表示该日停牌
if suspend_type == 'S':
# 保存停牌日期
suspend_dates_list.append(trade_date)
# 检查当前检查日期是否就是停牌日期
if trade_date == check_date:
logger.info(f"股票 {ts_code}{check_date} 处于停牌状态")
is_suspended = True
continue
# 处理传统的停牌数据格式包含suspend_date和resume_date
if 'suspend_date' not in row or 'resume_date' not in row:
# 检查是否是有效数据可能是API返回的其他格式
if 'ts_code' in row and 'trade_date' in row:
# 这是有效的单日停牌数据,只是字段名称不同
trade_date = row['trade_date']
logger.info(f"股票 {ts_code}{trade_date} 处于停牌状态")
if trade_date == check_date:
is_suspended = True
suspend_dates_list.append(trade_date)
latest_suspend_start = trade_date
else:
logger.warning(f"停牌数据缺少必要字段: {row}")
continue
suspend_start = row['suspend_date']
suspend_end = row['resume_date']
# 更新最近的停牌开始日期
if latest_suspend_start is None or suspend_start > latest_suspend_start:
latest_suspend_start = suspend_start
# 如果恢复日期为None或00000000表示尚未复牌
if not suspend_end or suspend_end == '00000000':
suspend_end = end_date # 使用查询结束日期
# 保存停牌日期范围
suspend_dates_list.append(f"{suspend_start}-{suspend_end}")
# 检查日期是否在停牌期间内
if suspend_start <= check_date <= suspend_end:
logger.info(f"股票 {ts_code}{check_date} 处于停牌状态({suspend_start}{suspend_end}")
is_suspended = True
# 合并停牌日期范围
suspend_dates = ", ".join(suspend_dates_list)
if not is_suspended:
logger.info(f"股票 {ts_code}{check_date} 未处于停牌状态")
return is_suspended, suspend_dates, latest_suspend_start
except Exception as e:
logger.error(f"检查股票停牌状态失败 {ts_code} {check_date}: {str(e)}")
return None, "", None
def update_stock_data(ts_code):
"""
更新指定股票的行情数据
:param ts_code: 股票代码,如'688800.SH'
:return: True表示更新成功False表示更新失败
"""
try:
logger.info(f"开始更新股票 {ts_code} 的行情数据")
# 构建输出文件路径
output_file = Config.DATA_DIR / f"{ts_code}_daily_data.txt"
# 获取Tushare API实例
pro = tushare_manager.get_pro_api()
# 检查是否存在现有数据文件
if output_file.exists():
# 读取现有数据,获取最新的交易日期
try:
# 使用与update_tushare_totxt.py相同的方式读取数据
df = pd.read_csv(output_file, sep='\t', encoding='utf-8')
if not df.empty and 'trade_date' in df.columns:
# 获取最新交易日期
latest_date = df['trade_date'].max()
# 计算下一个交易日的起始日期(避免重复获取同一天数据)
latest_dt = datetime.datetime.strptime(str(latest_date), '%Y%m%d')
next_dt = latest_dt + datetime.timedelta(days=1)
next_date = next_dt.strftime('%Y%m%d')
logger.info(f"股票 {ts_code} 现有最新日期: {latest_date},将获取 {next_date} 至今的数据")
# 控制请求频率
tushare_manager.control_request_rate()
# 获取最新日期之后的数据
new_df = pro.daily(ts_code=ts_code, start_date=next_date)
if new_df is not None and not new_df.empty:
logger.info(f"获取到 {ts_code} 的新数据 {len(new_df)}")
# 合并现有数据和新数据
combined_df = pd.concat([df, new_df], ignore_index=True)
# 去重,避免重复数据
combined_df = combined_df.drop_duplicates(subset=['trade_date', 'ts_code'], keep='last')
# 按交易日期降序排序,最新交易日排在最前面
combined_df = combined_df.sort_values('trade_date', ascending=False)
# 保存合并后的数据
combined_df.to_csv(output_file, index=False, sep='\t', encoding='utf-8')
logger.info(f"股票 {ts_code} 的行情数据已成功更新")
return True
else:
logger.info(f"未获取到股票 {ts_code} 的新数据")
return True
else:
logger.warning(f"文件 {output_file} 内容异常,重新获取全部数据")
except Exception as e:
logger.error(f"读取文件 {output_file} 失败: {str(e)}")
# 文件不存在或读取失败,获取全部数据
logger.info(f"获取股票 {ts_code} 的全部行情数据")
# 控制请求频率
tushare_manager.control_request_rate()
# 获取全部数据
full_df = pro.daily(ts_code=ts_code)
if full_df is not None and not full_df.empty:
# 按交易日期降序排序,最新交易日排在最前面
full_df = full_df.sort_values('trade_date', ascending=False)
# 保存数据
full_df.to_csv(output_file, index=False, sep='\t', encoding='utf-8')
logger.info(f"股票 {ts_code} 的行情数据已成功获取并保存")
return True
else:
logger.warning(f"未能获取到股票 {ts_code} 的行情数据")
return False
except Exception as e:
logger.error(f"更新股票 {ts_code} 数据失败: {str(e)}")
return False
def check_market_data(online_check=Config.DEFAULT_ONLINE_CHECK):
"""
检查所有行情数据文件的完整性
@@ -246,9 +449,19 @@ def check_market_data(online_check=Config.DEFAULT_ONLINE_CHECK):
# 设置数据目录
data_dir = Config.DATA_DIR
# 获取当天日期格式YYYYMMDD
today = datetime.datetime.now().strftime('%Y%m%d')
logger.info(f"开始检查行情数据完整性,当前日期:{today}")
# 获取当前时间
now = datetime.datetime.now()
today = now.strftime('%Y%m%d')
# 添加时间判断逻辑如果当前时间早于16:00检查日期为前一天否则为当天
if now.hour < 16:
# 获取前一天日期
yesterday = now - datetime.timedelta(days=1)
check_date = yesterday.strftime('%Y%m%d')
logger.info(f"当前时间{now.strftime('%Y-%m-%d %H:%M:%S')}早于16:00检查日期调整为前一天{check_date}")
else:
check_date = today
logger.info(f"开始检查行情数据完整性,检查日期:{check_date}")
# 获取所有txt文件列表
all_files = list(data_dir.glob('*.txt'))
@@ -278,21 +491,24 @@ def check_market_data(online_check=Config.DEFAULT_ONLINE_CHECK):
'latest_date': 'N/A',
'trading_days_diff': 'N/A',
'online_data_exists': 'N/A',
'status': '文件内容异常'
'status': '文件内容异常',
'是否在停牌状态': 'N/A',
'停牌的日期': 'N/A'
})
elif latest_date != today:
elif latest_date != check_date:
# 计算交易日差
trading_days_diff = calculate_trading_days_diff(latest_date, today)
trading_days_diff = calculate_trading_days_diff(latest_date, check_date)
# 检查在线数据是否存在
online_data_exists = None
if online_check:
online_data_exists = check_online_data_exists(ts_code, today)
online_data_exists = check_online_data_exists(ts_code, check_date)
status = '数据不完整'
if online_check and online_data_exists:
status += ',在线数据已更新'
# 先收集数据不完整的个股,不进行停牌检查
incomplete_files.append({
'file_name': file_name,
'ts_code': ts_code,
@@ -301,6 +517,7 @@ def check_market_data(online_check=Config.DEFAULT_ONLINE_CHECK):
'online_data_exists': '' if online_data_exists else '' if online_data_exists is False else '未检查',
'status': status
})
# 移除单个文件的完整日志
# 更新进度
@@ -311,13 +528,65 @@ def check_market_data(online_check=Config.DEFAULT_ONLINE_CHECK):
# 显示进度条
print(f"\r进度: [{'#' * int(progress / 2)}{' ' * (50 - int(progress / 2))}] {progress:.1f}% | 已完成: {completed}/{total} | 耗时: {elapsed:.1f}s", end='', flush=True)
# 添加调试信息
logger.info(f"收集到的不完整文件数量: {len(incomplete_files)}")
if incomplete_files:
logger.info(f"前5个不完整文件示例: {[f['ts_code'] for f in incomplete_files[:5]]}")
# 进度条完成后换行
print()
# 对收集到的不完整个股进行统一的停牌检查
logger.info(f"开始对 {len(incomplete_files)} 个数据不完整的个股进行停牌检查")
# 创建新的列表存储经过停牌检查后的结果
final_incomplete_files = []
for file_info in incomplete_files:
ts_code = file_info['ts_code']
latest_date = file_info['latest_date']
# 如果是文件内容异常,直接添加到最终列表
if file_info['status'] == '文件内容异常':
final_incomplete_files.append(file_info)
continue
# 进行停牌检查
is_suspended, suspend_dates, latest_suspend_start = check_stock_suspended(ts_code, check_date)
# 更新文件信息
file_info['是否在停牌状态'] = '' if is_suspended else '' if is_suspended is not None else '检查失败'
file_info['停牌的日期'] = suspend_dates if suspend_dates else ''
if is_suspended is True:
logger.info(f"股票 {ts_code} 当前处于停牌状态")
if latest_suspend_start is not None:
# 检查最新行情是否是停盘那天
if latest_date == latest_suspend_start:
logger.info(f"股票 {ts_code} 的最新行情日期 {latest_date} 与停牌开始日期 {latest_suspend_start} 一致,不输出报告")
continue # 跳过输出报告
else:
logger.info(f"股票 {ts_code} 的最新行情日期 {latest_date} 与停牌开始日期 {latest_suspend_start} 不一致,开始更新数据")
# 更新数据到最新
update_stock_data(ts_code)
# 更新最新日期为停牌开始日期
file_info['latest_date'] = latest_suspend_start
# 继续输出报告,因为已经更新了数据
elif is_suspended is None:
logger.warning(f"股票 {ts_code} 的停牌检查失败,继续输出报告")
# 如果没有停牌或停牌检查失败,添加到最终列表
final_incomplete_files.append(file_info)
# 更新incomplete_files为经过停牌检查后的最终列表
incomplete_files = final_incomplete_files
logger.info(f"停牌检查完成,剩余 {len(incomplete_files)} 个需要输出报告的不完整个股")
# 输出结果到CSV文件
output_file = Config.OUTPUT_FILE
with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['file_name', 'ts_code', 'latest_date', 'trading_days_diff', 'online_data_exists', 'status']
fieldnames = ['file_name', 'ts_code', 'latest_date', 'trading_days_diff', 'online_data_exists', 'status', '是否在停牌状态', '停牌的日期']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +1,11 @@
file_name,ts_code,latest_date,trading_days_diff,online_data_exists,status
002166.SZ_daily_data.txt,002166.SZ,20251209,6,未检查,数据不完整
002769.SZ_daily_data.txt,002769.SZ,20251203,10,未检查,数据不完整
300068.SZ_daily_data.txt,300068.SZ,20251211,4,未检查,数据不完整
300291.SZ_daily_data.txt,300291.SZ,20251215,2,未检查,数据不完整
300710.SZ_daily_data.txt,300710.SZ,20251203,10,未检查,数据不完整
600730.SH_daily_data.txt,600730.SH,20251212,3,未检查,数据不完整
600800.SH_daily_data.txt,600800.SH,20251205,8,未检查,数据不完整
601059.SH_daily_data.txt,601059.SH,20251119,20,未检查,数据不完整
601198.SH_daily_data.txt,601198.SH,20251119,20,未检查,数据不完整
601995.SH_daily_data.txt,601995.SH,20251119,20,未检查,数据不完整
file_name,ts_code,latest_date,trading_days_diff,online_data_exists,status,是否在停牌状态,停牌的日期
002166.SZ_daily_data.txt,002166.SZ,20251216,6,未检查,数据不完整,,"20251210, 20251211, 20251212, 20251215, 20251216"
002769.SZ_daily_data.txt,002769.SZ,20251216,10,未检查,数据不完整,,"20251204, 20251205, 20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216"
300068.SZ_daily_data.txt,300068.SZ,20251216,4,未检查,数据不完整,,"20251212, 20251215, 20251216"
300291.SZ_daily_data.txt,300291.SZ,20251216,2,未检查,数据不完整,,20251216
300710.SZ_daily_data.txt,300710.SZ,20251216,10,未检查,数据不完整,,"20251204, 20251205, 20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216"
600730.SH_daily_data.txt,600730.SH,20251216,3,未检查,数据不完整,,"20251215, 20251216"
600800.SH_daily_data.txt,600800.SH,20251216,8,未检查,数据不完整,,"20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216"
601059.SH_daily_data.txt,601059.SH,20251216,20,未检查,数据不完整,,"20251120, 20251121, 20251124, 20251125, 20251126, 20251127, 20251128, 20251201, 20251202, 20251203, 20251204, 20251205, 20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216"
601198.SH_daily_data.txt,601198.SH,20251216,20,未检查,数据不完整,,"20251120, 20251121, 20251124, 20251125, 20251126, 20251127, 20251128, 20251201, 20251202, 20251203, 20251204, 20251205, 20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216"
601995.SH_daily_data.txt,601995.SH,20251216,20,未检查,数据不完整,,"20251120, 20251121, 20251124, 20251125, 20251126, 20251127, 20251128, 20251201, 20251202, 20251203, 20251204, 20251205, 20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216"
1 file_name ts_code latest_date trading_days_diff online_data_exists status 是否在停牌状态 停牌的日期
2 002166.SZ_daily_data.txt 002166.SZ 20251209 20251216 6 未检查 数据不完整 20251210, 20251211, 20251212, 20251215, 20251216
3 002769.SZ_daily_data.txt 002769.SZ 20251203 20251216 10 未检查 数据不完整 20251204, 20251205, 20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216
4 300068.SZ_daily_data.txt 300068.SZ 20251211 20251216 4 未检查 数据不完整 20251212, 20251215, 20251216
5 300291.SZ_daily_data.txt 300291.SZ 20251215 20251216 2 未检查 数据不完整 20251216
6 300710.SZ_daily_data.txt 300710.SZ 20251203 20251216 10 未检查 数据不完整 20251204, 20251205, 20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216
7 600730.SH_daily_data.txt 600730.SH 20251212 20251216 3 未检查 数据不完整 20251215, 20251216
8 600800.SH_daily_data.txt 600800.SH 20251205 20251216 8 未检查 数据不完整 20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216
9 601059.SH_daily_data.txt 601059.SH 20251119 20251216 20 未检查 数据不完整 20251120, 20251121, 20251124, 20251125, 20251126, 20251127, 20251128, 20251201, 20251202, 20251203, 20251204, 20251205, 20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216
10 601198.SH_daily_data.txt 601198.SH 20251119 20251216 20 未检查 数据不完整 20251120, 20251121, 20251124, 20251125, 20251126, 20251127, 20251128, 20251201, 20251202, 20251203, 20251204, 20251205, 20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216
11 601995.SH_daily_data.txt 601995.SH 20251119 20251216 20 未检查 数据不完整 20251120, 20251121, 20251124, 20251125, 20251126, 20251127, 20251128, 20251201, 20251202, 20251203, 20251204, 20251205, 20251208, 20251209, 20251210, 20251211, 20251212, 20251215, 20251216