修改排序方式和更新内容,
This commit is contained in:
BIN
__pycache__/update_tushare_totxt.cpython-310.pyc
Normal file
BIN
__pycache__/update_tushare_totxt.cpython-310.pyc
Normal file
Binary file not shown.
21
app.log
21
app.log
@@ -1,2 +1,19 @@
|
|||||||
2025-12-05 19:21:46 - INFO - 日志文件已配置: app.log
|
2025-12-06 00:19:00 - INFO - 日志文件已配置: app.log
|
||||||
2025-12-05 19:21:51 - INFO - 所有资料已保存到 D:\gp_data\code\all_stock_codes.txt
|
2025-12-06 00:19:28 - INFO - 日志文件已配置: app.log
|
||||||
|
2025-12-06 00:22:02 - INFO - 日志文件已配置: app.log
|
||||||
|
2025-12-06 00:22:14 - INFO - 所有资料已保存到 D:\gp_data\code\all_stock_codes.txt
|
||||||
|
2025-12-06 08:43:38 - INFO - 日志文件已配置: app.log
|
||||||
|
2025-12-06 08:43:44 - INFO - 所有资料已保存到 D:\gp_data\code\all_stock_codes.txt
|
||||||
|
2025-12-06 09:05:30 - INFO - 所有资料已保存到 D:\gp_data\code\all_stock_codes.txt
|
||||||
|
2025-12-06 09:22:17 - INFO - 日志文件已配置: app.log
|
||||||
|
2025-12-06 09:23:01 - INFO - 日志文件已配置: app.log
|
||||||
|
2025-12-06 09:23:51 - INFO - 日志文件已配置: app.log
|
||||||
|
2025-12-06 09:28:29 - INFO - 日志文件已配置: app.log
|
||||||
|
2025-12-06 09:33:43 - INFO - 日志文件已配置: app.log
|
||||||
|
2025-12-06 09:34:15 - INFO - 日志文件已配置: app.log
|
||||||
|
2025-12-06 09:44:03 - INFO - 日志文件已配置: app.log
|
||||||
|
2025-12-06 09:44:07 - INFO - 所有资料已保存到 D:\gp_data\code\all_stock_codes.txt
|
||||||
|
2025-12-06 17:47:22 - INFO - 日志文件已配置: app.log
|
||||||
|
2025-12-06 17:47:26 - INFO - 所有资料已保存到 D:\gp_data\code\all_stock_codes.txt
|
||||||
|
2025-12-06 20:11:44 - INFO - 日志文件已配置: app.log
|
||||||
|
2025-12-06 20:12:22 - INFO - 所有资料已保存到 D:\gp_data\code\all_stock_codes.txt
|
||||||
|
|||||||
@@ -9,26 +9,51 @@ import tushare as ts
|
|||||||
import time
|
import time
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
|
||||||
|
# -------------------------- 配置参数区域 --------------------------
|
||||||
|
class Config:
|
||||||
|
"""配置参数类,集中管理所有配置"""
|
||||||
|
# 日志配置
|
||||||
|
LOG_NAME = 'market_data_check'
|
||||||
|
LOG_FILE = 'market_data_check.log'
|
||||||
|
LOG_LEVEL = logging.INFO
|
||||||
|
LOG_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
|
||||||
|
LOG_CONSOLE = True
|
||||||
|
LOG_DIR = '.'
|
||||||
|
LOG_BACKUP_COUNT = 3
|
||||||
|
|
||||||
|
# Tushare配置
|
||||||
|
TUSHARE_TOKENS = [
|
||||||
|
'9343e641869058684afeadfcfe7fd6684160852e52e85332a7734c8d' # 主账户
|
||||||
|
]
|
||||||
|
|
||||||
|
# API请求频率控制
|
||||||
|
MAX_REQUESTS_PER_MINUTE = 500
|
||||||
|
|
||||||
|
# 数据目录配置
|
||||||
|
DATA_DIR = Path(r'D:\gp_data\day')
|
||||||
|
|
||||||
|
# 交易日历配置
|
||||||
|
TRADE_CALENDAR_START_YEARS = 2 # 过去2年
|
||||||
|
TRADE_CALENDAR_END_MONTHS = 1 # 未来1个月
|
||||||
|
|
||||||
|
# 输出文件配置
|
||||||
|
OUTPUT_FILE = Path('market_data_check_result.csv')
|
||||||
|
|
||||||
|
# 默认参数配置
|
||||||
|
DEFAULT_ONLINE_CHECK = False
|
||||||
|
|
||||||
# 配置日志
|
# 配置日志
|
||||||
logger = setup_logger(
|
logger = setup_logger(
|
||||||
name='market_data_check',
|
name=Config.LOG_NAME,
|
||||||
log_file='market_data_check.log',
|
log_file=Config.LOG_FILE,
|
||||||
level=logging.INFO,
|
level=Config.LOG_LEVEL,
|
||||||
log_format='%(asctime)s - %(levelname)s - %(message)s',
|
log_format=Config.LOG_FORMAT,
|
||||||
console=True,
|
console=Config.LOG_CONSOLE,
|
||||||
log_dir='.',
|
log_dir=Config.LOG_DIR,
|
||||||
backup_count=3
|
backup_count=Config.LOG_BACKUP_COUNT
|
||||||
)
|
)
|
||||||
logger.propagate = False # 避免日志消息向上传递到父记录器,防止重复输出
|
logger.propagate = False # 避免日志消息向上传递到父记录器,防止重复输出
|
||||||
|
|
||||||
# Tushare配置
|
|
||||||
TUSHARE_TOKENS = [
|
|
||||||
'9343e641869058684afeadfcfe7fd6684160852e52e85332a7734c8d' # 主账户
|
|
||||||
]
|
|
||||||
|
|
||||||
# API请求频率控制
|
|
||||||
MAX_REQUESTS_PER_MINUTE = 500
|
|
||||||
|
|
||||||
class TushareManager:
|
class TushareManager:
|
||||||
"""Tushare API管理类,处理账户轮询和请求频率控制"""
|
"""Tushare API管理类,处理账户轮询和请求频率控制"""
|
||||||
def __init__(self, tokens):
|
def __init__(self, tokens):
|
||||||
@@ -55,7 +80,7 @@ class TushareManager:
|
|||||||
self.last_request_time = current_time
|
self.last_request_time = current_time
|
||||||
|
|
||||||
# 如果请求次数超过限制,等待
|
# 如果请求次数超过限制,等待
|
||||||
if self.request_count >= MAX_REQUESTS_PER_MINUTE:
|
if self.request_count >= Config.MAX_REQUESTS_PER_MINUTE:
|
||||||
wait_time = 60 - time_since_last_request + 1
|
wait_time = 60 - time_since_last_request + 1
|
||||||
logger.info(f"请求频率过高,等待 {wait_time:.1f} 秒")
|
logger.info(f"请求频率过高,等待 {wait_time:.1f} 秒")
|
||||||
time.sleep(wait_time)
|
time.sleep(wait_time)
|
||||||
@@ -70,7 +95,7 @@ class TushareManager:
|
|||||||
self.last_request_time = current_time
|
self.last_request_time = current_time
|
||||||
|
|
||||||
# 创建Tushare管理器实例
|
# 创建Tushare管理器实例
|
||||||
tushare_manager = TushareManager(TUSHARE_TOKENS)
|
tushare_manager = TushareManager(Config.TUSHARE_TOKENS)
|
||||||
|
|
||||||
# 全局变量,用于缓存交易日历
|
# 全局变量,用于缓存交易日历
|
||||||
trade_calendar_cache = None
|
trade_calendar_cache = None
|
||||||
@@ -88,10 +113,10 @@ def get_trade_calendar():
|
|||||||
return trade_calendar_cache
|
return trade_calendar_cache
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 计算日期范围:过去2年到未来1个月
|
# 计算日期范围:过去N年到未来M个月
|
||||||
today = datetime.datetime.now()
|
today = datetime.datetime.now()
|
||||||
start_date = (today - datetime.timedelta(days=730)).strftime('%Y%m%d') # 过去2年
|
start_date = (today - datetime.timedelta(days=365 * Config.TRADE_CALENDAR_START_YEARS)).strftime('%Y%m%d')
|
||||||
end_date = (today + datetime.timedelta(days=30)).strftime('%Y%m%d') # 未来1个月
|
end_date = (today + datetime.timedelta(days=30 * Config.TRADE_CALENDAR_END_MONTHS)).strftime('%Y%m%d')
|
||||||
|
|
||||||
pro = tushare_manager.get_pro_api()
|
pro = tushare_manager.get_pro_api()
|
||||||
tushare_manager.control_request_rate()
|
tushare_manager.control_request_rate()
|
||||||
@@ -116,6 +141,8 @@ def get_trade_calendar():
|
|||||||
def get_latest_trade_date(file_path):
|
def get_latest_trade_date(file_path):
|
||||||
"""
|
"""
|
||||||
从txt文件中获取最新的交易日期
|
从txt文件中获取最新的交易日期
|
||||||
|
注意:现在数据文件按日期降序保存,最新的交易日期在文件第一行数据(跳过表头)
|
||||||
|
|
||||||
:param file_path: 文件路径
|
:param file_path: 文件路径
|
||||||
:return: 最新交易日期字符串,如'20251204',如果文件为空返回None
|
:return: 最新交易日期字符串,如'20251204',如果文件为空返回None
|
||||||
"""
|
"""
|
||||||
@@ -125,17 +152,27 @@ def get_latest_trade_date(file_path):
|
|||||||
if len(lines) < 2: # 至少需要有表头和一行数据
|
if len(lines) < 2: # 至少需要有表头和一行数据
|
||||||
logger.warning(f"文件 {file_path} 内容不足")
|
logger.warning(f"文件 {file_path} 内容不足")
|
||||||
return None
|
return None
|
||||||
# 第二行是第一行数据(最新的交易日期)
|
|
||||||
first_data_line = lines[1].strip()
|
# 解析表头,找到trade_date列的索引
|
||||||
if not first_data_line:
|
header_line = lines[0].strip()
|
||||||
logger.warning(f"文件 {file_path} 数据行为空")
|
headers = header_line.split('\t')
|
||||||
|
if 'trade_date' not in headers:
|
||||||
|
logger.warning(f"文件 {file_path} 缺少trade_date列")
|
||||||
return None
|
return None
|
||||||
# 按制表符分割
|
trade_date_idx = headers.index('trade_date')
|
||||||
columns = first_data_line.split('\t')
|
|
||||||
if len(columns) < 2: # 至少需要有ts_code和trade_date
|
# 跳过空行,从文件开头查找第一行有效数据(最新日期)
|
||||||
logger.warning(f"文件 {file_path} 数据格式不正确")
|
for i in range(1, len(lines)): # 从第二行开始查找,跳过表头行(0)
|
||||||
return None
|
line = lines[i].strip()
|
||||||
return columns[1]
|
if line: # 找到非空行
|
||||||
|
columns = line.split('\t')
|
||||||
|
if len(columns) <= trade_date_idx: # 确保有足够的列
|
||||||
|
logger.warning(f"文件 {file_path} 数据格式不正确")
|
||||||
|
return None
|
||||||
|
return columns[trade_date_idx]
|
||||||
|
# 如果没有找到有效数据行
|
||||||
|
logger.warning(f"文件 {file_path} 无有效数据行")
|
||||||
|
return None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"读取文件 {file_path} 时出错: {str(e)}")
|
logger.error(f"读取文件 {file_path} 时出错: {str(e)}")
|
||||||
return None
|
return None
|
||||||
@@ -199,7 +236,7 @@ def check_online_data_exists(ts_code, trade_date):
|
|||||||
logger.error(f"查询在线数据失败 {ts_code} {trade_date}: {str(e)}")
|
logger.error(f"查询在线数据失败 {ts_code} {trade_date}: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def check_market_data(online_check=False):
|
def check_market_data(online_check=Config.DEFAULT_ONLINE_CHECK):
|
||||||
"""
|
"""
|
||||||
检查所有行情数据文件的完整性
|
检查所有行情数据文件的完整性
|
||||||
|
|
||||||
@@ -207,7 +244,7 @@ def check_market_data(online_check=False):
|
|||||||
online_check: 是否进行在线数据检查,默认False
|
online_check: 是否进行在线数据检查,默认False
|
||||||
"""
|
"""
|
||||||
# 设置数据目录
|
# 设置数据目录
|
||||||
data_dir = Path(r'D:\gp_data\day')
|
data_dir = Config.DATA_DIR
|
||||||
|
|
||||||
# 获取当天日期(格式:YYYYMMDD)
|
# 获取当天日期(格式:YYYYMMDD)
|
||||||
today = datetime.datetime.now().strftime('%Y%m%d')
|
today = datetime.datetime.now().strftime('%Y%m%d')
|
||||||
@@ -278,7 +315,7 @@ def check_market_data(online_check=False):
|
|||||||
print()
|
print()
|
||||||
|
|
||||||
# 输出结果到CSV文件
|
# 输出结果到CSV文件
|
||||||
output_file = Path('market_data_check_result.csv')
|
output_file = Config.OUTPUT_FILE
|
||||||
with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
|
with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
|
||||||
fieldnames = ['file_name', 'ts_code', 'latest_date', 'trading_days_diff', 'online_data_exists', 'status']
|
fieldnames = ['file_name', 'ts_code', 'latest_date', 'trading_days_diff', 'online_data_exists', 'status']
|
||||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||||
|
|||||||
52049
market_data_check.log
52049
market_data_check.log
File diff suppressed because it is too large
Load Diff
12
market_data_check.log.2025-12-05
Normal file
12
market_data_check.log.2025-12-05
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
2025-12-05 19:17:45 - INFO - 日志文件已配置: market_data_check.log
|
||||||
|
2025-12-05 19:17:45 - INFO - 开始检查行情数据完整性,当前日期:20251205
|
||||||
|
2025-12-05 19:17:45 - INFO - 成功获取并缓存交易日历: 20231206至20260104
|
||||||
|
2025-12-05 19:17:50 - INFO - 检查完成,共检查 5192 个文件
|
||||||
|
2025-12-05 19:17:50 - INFO - 发现 5192 个未更新到最新的数据文件
|
||||||
|
2025-12-05 19:17:50 - INFO - 检查结果已输出到:market_data_check_result.csv
|
||||||
|
2025-12-05 20:24:53 - INFO - 日志文件已配置: market_data_check.log
|
||||||
|
2025-12-05 20:24:53 - INFO - 开始检查行情数据完整性,当前日期:20251205
|
||||||
|
2025-12-05 20:24:58 - INFO - 成功获取并缓存交易日历: 20231206至20260104
|
||||||
|
2025-12-05 20:25:03 - INFO - 检查完成,共检查 5196 个文件
|
||||||
|
2025-12-05 20:25:03 - INFO - 发现 2620 个未更新到最新的数据文件
|
||||||
|
2025-12-05 20:25:03 - INFO - 检查结果已输出到:market_data_check_result.csv
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,6 +0,0 @@
|
|||||||
file_name,ts_code,latest_date,trading_days_diff,online_data_exists,status
|
|
||||||
000001.SZ_daily_data.txt,000001.SZ,20250806,82,否,数据不完整
|
|
||||||
000002.SZ_daily_data.txt,000002.SZ,20251204,2,否,数据不完整
|
|
||||||
000004.SZ_daily_data.txt,000004.SZ,20250806,82,否,数据不完整
|
|
||||||
000006.SZ_daily_data.txt,000006.SZ,20251204,2,否,数据不完整
|
|
||||||
000007.SZ_daily_data.txt,000007.SZ,20250806,82,否,数据不完整
|
|
||||||
|
@@ -4,6 +4,7 @@ import time
|
|||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
import socket
|
import socket
|
||||||
|
from datetime import timedelta
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import tushare as ts
|
import tushare as ts
|
||||||
@@ -528,6 +529,9 @@ class DataDownloader:
|
|||||||
|
|
||||||
def save_to_txt(self, data: pd.DataFrame, filename: str) -> bool:
|
def save_to_txt(self, data: pd.DataFrame, filename: str) -> bool:
|
||||||
try:
|
try:
|
||||||
|
# 按交易日期降序排序,确保最新交易日排在最前面
|
||||||
|
if 'trade_date' in data.columns:
|
||||||
|
data = data.sort_values('trade_date', ascending=False)
|
||||||
data.to_csv(filename, index=False, sep='\t', encoding='utf-8')
|
data.to_csv(filename, index=False, sep='\t', encoding='utf-8')
|
||||||
# logging.info(f"数据已保存到 {filename}")
|
# logging.info(f"数据已保存到 {filename}")
|
||||||
return True
|
return True
|
||||||
@@ -538,10 +542,68 @@ class DataDownloader:
|
|||||||
def process_stock_code(self, code, progress_queue=None): # 修改参数默认值为None
|
def process_stock_code(self, code, progress_queue=None): # 修改参数默认值为None
|
||||||
pro = self.account_manager.get_next_account()
|
pro = self.account_manager.get_next_account()
|
||||||
try:
|
try:
|
||||||
df = self.fetch_data_with_retry(pro.daily, ts_code=code)
|
output_file = os.path.join(Config.OUTPUT_DIR, f"{code}_daily_data.txt")
|
||||||
if df is not None:
|
|
||||||
output_file = os.path.join(Config.OUTPUT_DIR, f"{code}_daily_data.txt")
|
# 检查是否存在现有数据文件
|
||||||
self.save_to_txt(df, output_file)
|
if os.path.exists(output_file):
|
||||||
|
# 读取现有数据,获取最新的交易日期
|
||||||
|
existing_df = self.read_from_txt(output_file)
|
||||||
|
if existing_df is not None and not existing_df.empty:
|
||||||
|
# 获取最新交易日期
|
||||||
|
if 'trade_date' in existing_df.columns:
|
||||||
|
# 由于read_from_txt会将trade_date转换为datetime格式
|
||||||
|
# 确保现有数据的trade_date列是datetime格式
|
||||||
|
if not pd.api.types.is_datetime64_any_dtype(existing_df['trade_date']):
|
||||||
|
existing_df['trade_date'] = pd.to_datetime(existing_df['trade_date'], format='%Y%m%d')
|
||||||
|
|
||||||
|
# 获取最新交易日期
|
||||||
|
latest_date_dt = existing_df['trade_date'].max()
|
||||||
|
# 计算下一个交易日的起始日期(避免重复获取同一天数据)
|
||||||
|
next_date_dt = latest_date_dt + timedelta(days=1)
|
||||||
|
next_date = next_date_dt.strftime('%Y%m%d')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# 获取最新日期之后的数据
|
||||||
|
df = self.fetch_data_with_retry(pro.daily, ts_code=code, start_date=next_date)
|
||||||
|
|
||||||
|
if df is not None and not df.empty:
|
||||||
|
|
||||||
|
# 将新数据的trade_date列转换为datetime格式,以便合并
|
||||||
|
df['trade_date'] = pd.to_datetime(df['trade_date'], format='%Y%m%d')
|
||||||
|
|
||||||
|
# 合并现有数据和新数据
|
||||||
|
combined_df = pd.concat([existing_df, df], ignore_index=True)
|
||||||
|
|
||||||
|
# 去重,避免重复数据
|
||||||
|
combined_df = combined_df.drop_duplicates(subset=['trade_date', 'ts_code'], keep='last')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# 按交易日期降序排序,最新交易日排在最前面
|
||||||
|
combined_df = combined_df.sort_values('trade_date', ascending=False)
|
||||||
|
|
||||||
|
# 将trade_date转换回字符串格式保存
|
||||||
|
combined_df['trade_date'] = combined_df['trade_date'].dt.strftime('%Y%m%d')
|
||||||
|
|
||||||
|
# 保存合并后的数据
|
||||||
|
self.save_to_txt(combined_df, output_file)
|
||||||
|
else:
|
||||||
|
# 如果现有数据没有 trade_date 列,重新获取全部数据
|
||||||
|
df = self.fetch_data_with_retry(pro.daily, ts_code=code)
|
||||||
|
if df is not None:
|
||||||
|
self.save_to_txt(df, output_file)
|
||||||
|
else:
|
||||||
|
# 现有数据为空,重新获取全部数据
|
||||||
|
df = self.fetch_data_with_retry(pro.daily, ts_code=code)
|
||||||
|
if df is not None:
|
||||||
|
self.save_to_txt(df, output_file)
|
||||||
|
else:
|
||||||
|
# 文件不存在,获取全部数据
|
||||||
|
df = self.fetch_data_with_retry(pro.daily, ts_code=code)
|
||||||
|
if df is not None:
|
||||||
|
self.save_to_txt(df, output_file)
|
||||||
|
|
||||||
if progress_queue is not None: # 添加判断
|
if progress_queue is not None: # 添加判断
|
||||||
progress_queue.put(1)
|
progress_queue.put(1)
|
||||||
except (ConnectionError, TimeoutError) as e:
|
except (ConnectionError, TimeoutError) as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user