Commit bd8b72f3 authored by 李宗熹's avatar 李宗熹

舍弃fund_info全加载

parent 946057e1
...@@ -299,6 +299,53 @@ def get_tamp_nav(fund, start_date, rollback=False, invest_type=2): ...@@ -299,6 +299,53 @@ def get_tamp_nav(fund, start_date, rollback=False, invest_type=2):
return df return df
def get_nav(fund, start_date, rollback=False, invest_type='private'):
"""获取基金ID为fund, 起始日期为start_date, 终止日期为当前日期的基金净值表
Args:
fund[str]:基金ID
start_date[date]:起始日期
rollback[bool]:当起始日期不在净值公布日历中,是否往前取最近的净值公布日
public[bool]:是否为公募
Returns:df[DataFrame]: 索引为净值公布日, 列为复权净值的净值表; 查询失败则返回None
"""
with TAMP_SQL(tamp_fund_engine) as tamp_product:
tamp_product_session = tamp_product.session
if invest_type == 'public':
sql = "SELECT ts_code, end_date, adj_nav FROM public_fund_nav " \
"WHERE ts_code='{}'".format(fund)
cur = tamp_product_session.execute(sql)
data = cur.fetchall()
df = pd.DataFrame(list(data), columns=['fund_id', 'end_date', 'adj_nav']).dropna(how='any')
df.rename({'ts_code': 'fund_id'}, axis=1, inplace=True)
else:
sql = "SELECT fund_id, price_date, cumulative_nav FROM fund_nav " \
"WHERE fund_id='{}'".format(fund)
# df = pd.read_sql(sql, con).dropna(how='any')
cur = tamp_product_session.execute(sql)
data = cur.fetchall()
df = pd.DataFrame(data, columns=['fund_id', 'price_date', 'cumulative_nav']).dropna(how='any')
df.rename({'price_date': 'end_date', 'cumulative_nav': 'adj_nav'}, axis=1, inplace=True)
if df['adj_nav'].count() == 0:
# logging.log(logging.ERROR, "CAN NOT FIND {}".format(fund))
return None
df['end_date'] = pd.to_datetime(df['end_date'])
if rollback and df['end_date'].min() < start_date < df['end_date'].max():
while start_date not in list(df['end_date']):
start_date -= datetime.timedelta(days=1)
df = df[df['end_date'] >= start_date]
df.drop_duplicates(subset='end_date', inplace=True, keep='first')
df.set_index('end_date', inplace=True)
df.sort_index(inplace=True, ascending=True)
return df
def get_risk_level(substrategy): def get_risk_level(substrategy):
"""获取风险类型 """获取风险类型
...@@ -439,19 +486,20 @@ class PortfolioDiagnose(object): ...@@ -439,19 +486,20 @@ class PortfolioDiagnose(object):
""" """
# 获取原始投资组合的第一支基金的净值表 # 获取原始投资组合的第一支基金的净值表
prod = get_tamp_nav(self.portfolio[0], self.start_date, invest_type=self.portfolio_dict[self.portfolio[0]]) prod = get_tamp_nav(self.portfolio[0], self.start_date, invest_type=self.portfolio_dict[self.portfolio[0]])
fund_info = get_fund_info(self.end_date, invest_type=self.invest_type) # fund_info = get_fund_info(self.end_date, invest_type=self.invest_type)
# while prod is None or prod.index[-1] - prod.index[0] < 0.6 * (self.end_date - self.start_date): # while prod is None or prod.index[-1] - prod.index[0] < 0.6 * (self.end_date - self.start_date):
while prod is None: while prod is None:
# 获取的净值表为空时首先考虑基金净值数据不足半年,查找同一基金经理下的相同二级策略的基金ID作替换 # 获取的净值表为空时首先考虑基金净值数据不足半年,查找同一基金经理下的相同二级策略的基金ID作替换
fund_info = get_fund_info(self.portfolio[0], self.end_date, self.invest_type)
result = fund_info[fund_info['fund_id'] == self.portfolio[0]] result = fund_info[fund_info['fund_id'] == self.portfolio[0]]
if result.empty: if result.empty:
break break
manager = str(result['manager'].values) manager = str(result['manager'].values)
strategy = result['substrategy'].values strategy = result['substrategy'].values[0]
print('基金id:', self.portfolio[0], '基金经理: ', manager, '策略: ', strategy)
replaced_fund = replace_fund(manager, strategy, fund_rank) replaced_fund = replace_fund(manager, strategy, fund_rank)
print('替换基金:', replaced_fund)
if replaced_fund: if replaced_fund:
# 替换基金数据非空则记录替换的基金对 # 替换基金数据非空则记录替换的基金对
...@@ -604,7 +652,6 @@ class PortfolioDiagnose(object): ...@@ -604,7 +652,6 @@ class PortfolioDiagnose(object):
return prod return prod
def optimize(self, ): def optimize(self, ):
import time import time
start = time.time() start = time.time()
...@@ -672,7 +719,7 @@ class PortfolioDiagnose(object): ...@@ -672,7 +719,7 @@ class PortfolioDiagnose(object):
for i in range(1, max_len): for i in range(1, max_len):
proposal_fund_combinations = list(combinations(candidate_funds, r=i)) proposal_fund_combinations = list(combinations(candidate_funds, r=i))
for proposal_funds in proposal_fund_combinations: for proposal_funds in proposal_fund_combinations:
drop_funds = list(set(candidate_funds) - set(proposal_funds)) drop_funds = list(set(candidate_funds) - set(proposal_funds) - set(self.replace_pair.values()))
temp = prod.drop(columns=drop_funds, axis=1) temp = prod.drop(columns=drop_funds, axis=1)
mu = [search_rank(fund_rank, x, 'annual_return') for x in temp.columns] mu = [search_rank(fund_rank, x, 'annual_return') for x in temp.columns]
...@@ -1310,15 +1357,15 @@ class PortfolioDiagnose(object): ...@@ -1310,15 +1357,15 @@ class PortfolioDiagnose(object):
compare_data.append(com_data) compare_data.append(com_data)
return compare_data return compare_data
# portfolio = ['HF00002JJ2', 'HF00005DBQ', 'HF0000681Q', 'HF00006693', 'HF00006AZF', 'HF00006BGS'] # portfolio = {'HF00002JJ2':2, 'HF00005DBQ':2, 'HF0000681Q':2, 'HF00006693':2, 'HF00006AZF':2, 'HF00006BGS':2}
# portfolio_diagnose = PortfolioDiagnose(client_type=1, portfolio=portfolio, invest_amount=10000000) # portfolio_diagnose = PortfolioDiagnose(client_type=1, portfolio=portfolio, invest_amount=10000000)
# portfolio_diagnose.optimize() # portfolio_diagnose.optimize()
# if __name__ == '__main__': # if __name__ == '__main__':
# print(portfolio_diagnose.single_fund_radar()) # print(portfolio_diagnose.single_fund_radar())
# print(portfolio_diagnose.propose_fund_radar()) # print(portfolio_diagnose.propose_fund_radar())
# print(portfolio_diagnose.old_portfolio_evaluation()) # print(portfolio_diagnose.old_portfolio_evaluation())
# print('旧组合相关性:', portfolio_diagnose.old_correlation) # print('旧组合相关性:', portfolio_diagnose.old_correlation)
# print('新组合相关性:', portfolio_diagnose.new_correlation) # print('新组合相关性:', portfolio_diagnose.new_correlation)
# print('旧组合个基评价:', portfolio_diagnose.old_portfolio_evaluation()) # print('旧组合个基评价:', portfolio_diagnose.old_portfolio_evaluation())
# print('新组合个基评价:', portfolio_diagnose.propose_fund_evaluation()) # print('新组合个基评价:', portfolio_diagnose.propose_fund_evaluation())
# print(portfolio_diagnose.single_evaluation(fund_id='HF0000681Q')) # print(portfolio_diagnose.single_evaluation(fund_id='HF0000681Q'))
\ No newline at end of file \ No newline at end of file
...@@ -12,48 +12,42 @@ from app.api.engine import tamp_fund_engine, TAMP_SQL, tamp_product_engine ...@@ -12,48 +12,42 @@ from app.api.engine import tamp_fund_engine, TAMP_SQL, tamp_product_engine
from app.utils.week_evaluation import * from app.utils.week_evaluation import *
# con = pymysql.connect(host='tamper.mysql.polardb.rds.aliyuncs.com', def get_tamp_nav(fund, start_date, rollback=False, invest_type=2):
# user='tamp_fund',
# password='@imeng408',
# database='tamp_fund',
# charset='utf8',
# use_unicode='True')
def get_nav(fund, start_date, rollback=False, invest_type='public'):
"""获取基金ID为fund, 起始日期为start_date, 终止日期为当前日期的基金净值表 """获取基金ID为fund, 起始日期为start_date, 终止日期为当前日期的基金净值表
Args: Args:
fund[str]:基金ID fund[str]:基金ID
start_date[date]:起始日期 start_date[date]:起始日期
rollback[bool]:当起始日期不在净值公布日历中,是否往前取最近的净值公布日 rollback[bool]:当起始日期不在净值公布日历中,是否往前取最近的净值公布日
public[bool]:是否为公募 invest_type[num]:0:公募 1:私募 2:优选
Returns:df[DataFrame]: 索引为净值公布日, 列为复权净值的净值表; 查询失败则返回None Returns:df[DataFrame]: 索引为净值公布日, 列为复权净值的净值表; 查询失败则返回None
""" """
with TAMP_SQL(tamp_fund_engine) as tamp_product: with TAMP_SQL(tamp_product_engine) as tamp_product, TAMP_SQL(tamp_fund_engine) as tamp_fund:
tamp_product_session = tamp_product.session tamp_product_session = tamp_product.session
if invest_type == 'public': tamp_fund_session = tamp_fund.session
sql = "SELECT ts_code, end_date, adj_nav FROM public_fund_nav " \ # if invest_type == "private":
"WHERE ts_code='{}'".format(fund) # sql = "SELECT fund_id, price_date, cumulative_nav FROM fund_nav " \
cur = tamp_product_session.execute(sql) # "WHERE fund_id='{}'".format(fund)
data = cur.fetchall() # # df = pd.read_sql(sql, con).dropna(how='any')
df = pd.DataFrame(list(data), columns=['fund_id', 'end_date', 'adj_nav']).dropna(how='any') # cur = tamp_product_session.execute(sql)
df.rename({'ts_code': 'fund_id'}, axis=1, inplace=True) if invest_type == 0:
else: sql = """select distinct `id`, `end_date`, `accum_nav` from `public_fund_nav` where `id`='{}' order by `end_date` ASC""".format(
sql = "SELECT fund_id, price_date, cumulative_nav FROM fund_nav " \ fund)
"WHERE fund_id='{}'".format(fund) cur = tamp_fund_session.execute(sql)
# df = pd.read_sql(sql, con).dropna(how='any') elif invest_type == 1:
sql = """select distinct `fund_id`, `price_date`,`cumulative_nav` from `fund_nav` where `fund_id`='{}' order by `price_date` ASC""".format(
fund)
cur = tamp_fund_session.execute(sql)
elif invest_type == 2:
sql = """select distinct `fund_id`,`price_date`,`cumulative_nav` from `fund_nav` where `fund_id`='{}' order by `price_date` ASC""".format(
fund)
cur = tamp_product_session.execute(sql) cur = tamp_product_session.execute(sql)
data = cur.fetchall() data = cur.fetchall()
df = pd.DataFrame(data, columns=['fund_id', 'price_date', 'cumulative_nav']).dropna(how='any') df = pd.DataFrame(data, columns=['fund_id', 'price_date', 'cumulative_nav']).dropna(how='any')
df.rename({'price_date': 'end_date', 'cumulative_nav': 'adj_nav'}, axis=1, inplace=True) df.rename({'price_date': 'end_date', 'cumulative_nav': 'adj_nav'}, axis=1, inplace=True)
if df['adj_nav'].count() == 0:
logging.log(logging.ERROR, "CAN NOT FIND {}".format(fund))
return None
df['end_date'] = pd.to_datetime(df['end_date']) df['end_date'] = pd.to_datetime(df['end_date'])
if rollback and df['end_date'].min() < start_date < df['end_date'].max(): if rollback and df['end_date'].min() < start_date < df['end_date'].max():
...@@ -142,7 +136,7 @@ def get_manager(invest_type): ...@@ -142,7 +136,7 @@ def get_manager(invest_type):
return df return df
def get_fund_info(end_date, invest_type): def get_fund_info(fund, end_date, invest_type):
"""[summary] """[summary]
Args: Args:
...@@ -152,13 +146,13 @@ def get_fund_info(end_date, invest_type): ...@@ -152,13 +146,13 @@ def get_fund_info(end_date, invest_type):
Returns: Returns:
[type]: [description] [type]: [description]
""" """
with TAMP_SQL(tamp_fund_engine) as tamp_product: with TAMP_SQL(tamp_fund_engine) as tamp_fund:
tamp_product_session = tamp_product.session tamp_fund_session = tamp_fund.session
if invest_type == 'public': if invest_type == 'public':
sql = "SELECT ts_code, fund_type, management FROM public_fund_basic " \ sql = "SELECT ts_code, fund_type, management FROM public_fund_basic " \
"WHERE delist_date IS NULL AND (due_date IS NULL OR due_date>'{}')".format(end_date.strftime('%Y%m%d')) "WHERE delist_date IS NULL AND (due_date IS NULL OR due_date>'{}')".format(end_date.strftime('%Y%m%d'))
# df = pd.read_sql(sql, con).dropna(how='all') # df = pd.read_sql(sql, con).dropna(how='all')
cur = tamp_product_session.execute(sql) cur = tamp_fund_session.execute(sql)
data = cur.fetchall() data = cur.fetchall()
df = pd.DataFrame(list(data), columns=['ts_code', 'fund_type', 'management']) df = pd.DataFrame(list(data), columns=['ts_code', 'fund_type', 'management'])
...@@ -167,17 +161,18 @@ def get_fund_info(end_date, invest_type): ...@@ -167,17 +161,18 @@ def get_fund_info(end_date, invest_type):
df.rename({'ts_code': 'fund_id'}, axis=1, inplace=True) df.rename({'ts_code': 'fund_id'}, axis=1, inplace=True)
df = pd.merge(df, manager_info, how="left", on='fund_id') df = pd.merge(df, manager_info, how="left", on='fund_id')
else: else:
sql = "SELECT a.id, a.substrategy, b.fund_manager_id " \
sql = "SELECT id, substrategy FROM fund_info WHERE delete_tag=0 " \ "FROM fund_info as a LEFT JOIN fund_manager_mapping as b " \
"AND substrategy!=-1" "ON a.id = b.fund_id WHERE a.delete_tag=0 " \
cur = tamp_product_session.execute(sql) "AND a.substrategy!=-1 AND a.id='{}'".format(fund)
cur = tamp_fund_session.execute(sql)
data = cur.fetchall() data = cur.fetchall()
df = pd.DataFrame(list(data), columns=['id', 'substrategy']) df = pd.DataFrame(list(data), columns=['fund_id', 'substrategy', 'manager'])
# df = pd.read_sql(sql, con).dropna(how='all') # df = pd.read_sql(sql, con).dropna(how='all')
df.rename({'id': 'fund_id'}, axis=1, inplace=True) # df.rename({'id': 'fund_id'}, axis=1, inplace=True)
manager_info = get_manager(invest_type) # manager_info = get_manager(invest_type)
df = pd.merge(df, manager_info, how="inner", on='fund_id') # df = pd.merge(df, manager_info, how="inner", on='fund_id')
return df return df
...@@ -255,7 +250,7 @@ def metric_rank(df): ...@@ -255,7 +250,7 @@ def metric_rank(df):
return df return df
def fund_rank(start_date, end_date, invest_type='private'): def fund_rank(start_date, end_date, invest_type=1):
fund_info = get_fund_info(end_date, invest_type=invest_type) fund_info = get_fund_info(end_date, invest_type=invest_type)
group = fund_info.groupby('substrategy') group = fund_info.groupby('substrategy')
...@@ -270,7 +265,7 @@ def fund_rank(start_date, end_date, invest_type='private'): ...@@ -270,7 +265,7 @@ def fund_rank(start_date, end_date, invest_type='private'):
for substrategy in grouped_fund.index: for substrategy in grouped_fund.index:
for fund in grouped_fund[substrategy]: for fund in grouped_fund[substrategy]:
df = get_nav(fund, start_date, rollback=False, invest_type=invest_type) df = get_tamp_nav(fund, start_date, rollback=False, invest_type=invest_type)
try: try:
if df.index[-1] - df.index[0] < 0.6 * (end_date - start_date): if df.index[-1] - df.index[0] < 0.6 * (end_date - start_date):
...@@ -278,7 +273,7 @@ def fund_rank(start_date, end_date, invest_type='private'): ...@@ -278,7 +273,7 @@ def fund_rank(start_date, end_date, invest_type='private'):
logging.log(logging.INFO, 'Skipped {}'.format(fund)) logging.log(logging.INFO, 'Skipped {}'.format(fund))
continue continue
n = get_frequency(df) n = get_frequency(df)
except Exception as e: except:
# logging.log(logging.ERROR, repr(e)) # logging.log(logging.ERROR, repr(e))
logging.log(logging.INFO, 'Skipped {}'.format(fund)) logging.log(logging.INFO, 'Skipped {}'.format(fund))
continue continue
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment