Commit bd8b72f3 authored by 李宗熹's avatar 李宗熹

舍弃fund_info全加载

parent 946057e1
......@@ -299,6 +299,53 @@ def get_tamp_nav(fund, start_date, rollback=False, invest_type=2):
return df
def get_nav(fund, start_date, rollback=False, invest_type='private'):
"""获取基金ID为fund, 起始日期为start_date, 终止日期为当前日期的基金净值表
Args:
fund[str]:基金ID
start_date[date]:起始日期
rollback[bool]:当起始日期不在净值公布日历中,是否往前取最近的净值公布日
public[bool]:是否为公募
Returns:df[DataFrame]: 索引为净值公布日, 列为复权净值的净值表; 查询失败则返回None
"""
with TAMP_SQL(tamp_fund_engine) as tamp_product:
tamp_product_session = tamp_product.session
if invest_type == 'public':
sql = "SELECT ts_code, end_date, adj_nav FROM public_fund_nav " \
"WHERE ts_code='{}'".format(fund)
cur = tamp_product_session.execute(sql)
data = cur.fetchall()
df = pd.DataFrame(list(data), columns=['fund_id', 'end_date', 'adj_nav']).dropna(how='any')
df.rename({'ts_code': 'fund_id'}, axis=1, inplace=True)
else:
sql = "SELECT fund_id, price_date, cumulative_nav FROM fund_nav " \
"WHERE fund_id='{}'".format(fund)
# df = pd.read_sql(sql, con).dropna(how='any')
cur = tamp_product_session.execute(sql)
data = cur.fetchall()
df = pd.DataFrame(data, columns=['fund_id', 'price_date', 'cumulative_nav']).dropna(how='any')
df.rename({'price_date': 'end_date', 'cumulative_nav': 'adj_nav'}, axis=1, inplace=True)
if df['adj_nav'].count() == 0:
# logging.log(logging.ERROR, "CAN NOT FIND {}".format(fund))
return None
df['end_date'] = pd.to_datetime(df['end_date'])
if rollback and df['end_date'].min() < start_date < df['end_date'].max():
while start_date not in list(df['end_date']):
start_date -= datetime.timedelta(days=1)
df = df[df['end_date'] >= start_date]
df.drop_duplicates(subset='end_date', inplace=True, keep='first')
df.set_index('end_date', inplace=True)
df.sort_index(inplace=True, ascending=True)
return df
def get_risk_level(substrategy):
"""获取风险类型
......@@ -439,19 +486,20 @@ class PortfolioDiagnose(object):
"""
# 获取原始投资组合的第一支基金的净值表
prod = get_tamp_nav(self.portfolio[0], self.start_date, invest_type=self.portfolio_dict[self.portfolio[0]])
fund_info = get_fund_info(self.end_date, invest_type=self.invest_type)
# fund_info = get_fund_info(self.end_date, invest_type=self.invest_type)
# while prod is None or prod.index[-1] - prod.index[0] < 0.6 * (self.end_date - self.start_date):
while prod is None:
# 获取的净值表为空时首先考虑基金净值数据不足半年,查找同一基金经理下的相同二级策略的基金ID作替换
fund_info = get_fund_info(self.portfolio[0], self.end_date, self.invest_type)
result = fund_info[fund_info['fund_id'] == self.portfolio[0]]
if result.empty:
break
manager = str(result['manager'].values)
strategy = result['substrategy'].values
strategy = result['substrategy'].values[0]
print('基金id:', self.portfolio[0], '基金经理: ', manager, '策略: ', strategy)
replaced_fund = replace_fund(manager, strategy, fund_rank)
print('替换基金:', replaced_fund)
if replaced_fund:
# 替换基金数据非空则记录替换的基金对
......@@ -604,7 +652,6 @@ class PortfolioDiagnose(object):
return prod
def optimize(self, ):
import time
start = time.time()
......@@ -672,7 +719,7 @@ class PortfolioDiagnose(object):
for i in range(1, max_len):
proposal_fund_combinations = list(combinations(candidate_funds, r=i))
for proposal_funds in proposal_fund_combinations:
drop_funds = list(set(candidate_funds) - set(proposal_funds))
drop_funds = list(set(candidate_funds) - set(proposal_funds) - set(self.replace_pair.values()))
temp = prod.drop(columns=drop_funds, axis=1)
mu = [search_rank(fund_rank, x, 'annual_return') for x in temp.columns]
......@@ -1310,15 +1357,15 @@ class PortfolioDiagnose(object):
compare_data.append(com_data)
return compare_data
# portfolio = ['HF00002JJ2', 'HF00005DBQ', 'HF0000681Q', 'HF00006693', 'HF00006AZF', 'HF00006BGS']
# portfolio = {'HF00002JJ2':2, 'HF00005DBQ':2, 'HF0000681Q':2, 'HF00006693':2, 'HF00006AZF':2, 'HF00006BGS':2}
# portfolio_diagnose = PortfolioDiagnose(client_type=1, portfolio=portfolio, invest_amount=10000000)
# portfolio_diagnose.optimize()
# if __name__ == '__main__':
# print(portfolio_diagnose.single_fund_radar())
# print(portfolio_diagnose.propose_fund_radar())
# print(portfolio_diagnose.old_portfolio_evaluation())
# print('旧组合相关性:', portfolio_diagnose.old_correlation)
# print('新组合相关性:', portfolio_diagnose.new_correlation)
# print('旧组合个基评价:', portfolio_diagnose.old_portfolio_evaluation())
# print('新组合个基评价:', portfolio_diagnose.propose_fund_evaluation())
# print(portfolio_diagnose.single_evaluation(fund_id='HF0000681Q'))
\ No newline at end of file
# print(portfolio_diagnose.single_fund_radar())
# print(portfolio_diagnose.propose_fund_radar())
# print(portfolio_diagnose.old_portfolio_evaluation())
# print('旧组合相关性:', portfolio_diagnose.old_correlation)
# print('新组合相关性:', portfolio_diagnose.new_correlation)
# print('旧组合个基评价:', portfolio_diagnose.old_portfolio_evaluation())
# print('新组合个基评价:', portfolio_diagnose.propose_fund_evaluation())
# print(portfolio_diagnose.single_evaluation(fund_id='HF0000681Q'))
\ No newline at end of file
......@@ -12,48 +12,42 @@ from app.api.engine import tamp_fund_engine, TAMP_SQL, tamp_product_engine
from app.utils.week_evaluation import *
# con = pymysql.connect(host='tamper.mysql.polardb.rds.aliyuncs.com',
# user='tamp_fund',
# password='@imeng408',
# database='tamp_fund',
# charset='utf8',
# use_unicode='True')
def get_nav(fund, start_date, rollback=False, invest_type='public'):
def get_tamp_nav(fund, start_date, rollback=False, invest_type=2):
"""获取基金ID为fund, 起始日期为start_date, 终止日期为当前日期的基金净值表
Args:
fund[str]:基金ID
start_date[date]:起始日期
rollback[bool]:当起始日期不在净值公布日历中,是否往前取最近的净值公布日
public[bool]:是否为公募
invest_type[num]:0:公募 1:私募 2:优选
Returns:df[DataFrame]: 索引为净值公布日, 列为复权净值的净值表; 查询失败则返回None
"""
with TAMP_SQL(tamp_fund_engine) as tamp_product:
with TAMP_SQL(tamp_product_engine) as tamp_product, TAMP_SQL(tamp_fund_engine) as tamp_fund:
tamp_product_session = tamp_product.session
if invest_type == 'public':
sql = "SELECT ts_code, end_date, adj_nav FROM public_fund_nav " \
"WHERE ts_code='{}'".format(fund)
cur = tamp_product_session.execute(sql)
data = cur.fetchall()
df = pd.DataFrame(list(data), columns=['fund_id', 'end_date', 'adj_nav']).dropna(how='any')
df.rename({'ts_code': 'fund_id'}, axis=1, inplace=True)
else:
sql = "SELECT fund_id, price_date, cumulative_nav FROM fund_nav " \
"WHERE fund_id='{}'".format(fund)
# df = pd.read_sql(sql, con).dropna(how='any')
tamp_fund_session = tamp_fund.session
# if invest_type == "private":
# sql = "SELECT fund_id, price_date, cumulative_nav FROM fund_nav " \
# "WHERE fund_id='{}'".format(fund)
# # df = pd.read_sql(sql, con).dropna(how='any')
# cur = tamp_product_session.execute(sql)
if invest_type == 0:
sql = """select distinct `id`, `end_date`, `accum_nav` from `public_fund_nav` where `id`='{}' order by `end_date` ASC""".format(
fund)
cur = tamp_fund_session.execute(sql)
elif invest_type == 1:
sql = """select distinct `fund_id`, `price_date`,`cumulative_nav` from `fund_nav` where `fund_id`='{}' order by `price_date` ASC""".format(
fund)
cur = tamp_fund_session.execute(sql)
elif invest_type == 2:
sql = """select distinct `fund_id`,`price_date`,`cumulative_nav` from `fund_nav` where `fund_id`='{}' order by `price_date` ASC""".format(
fund)
cur = tamp_product_session.execute(sql)
data = cur.fetchall()
df = pd.DataFrame(data, columns=['fund_id', 'price_date', 'cumulative_nav']).dropna(how='any')
df.rename({'price_date': 'end_date', 'cumulative_nav': 'adj_nav'}, axis=1, inplace=True)
if df['adj_nav'].count() == 0:
logging.log(logging.ERROR, "CAN NOT FIND {}".format(fund))
return None
df['end_date'] = pd.to_datetime(df['end_date'])
if rollback and df['end_date'].min() < start_date < df['end_date'].max():
......@@ -142,7 +136,7 @@ def get_manager(invest_type):
return df
def get_fund_info(end_date, invest_type):
def get_fund_info(fund, end_date, invest_type):
"""[summary]
Args:
......@@ -152,13 +146,13 @@ def get_fund_info(end_date, invest_type):
Returns:
[type]: [description]
"""
with TAMP_SQL(tamp_fund_engine) as tamp_product:
tamp_product_session = tamp_product.session
with TAMP_SQL(tamp_fund_engine) as tamp_fund:
tamp_fund_session = tamp_fund.session
if invest_type == 'public':
sql = "SELECT ts_code, fund_type, management FROM public_fund_basic " \
"WHERE delist_date IS NULL AND (due_date IS NULL OR due_date>'{}')".format(end_date.strftime('%Y%m%d'))
# df = pd.read_sql(sql, con).dropna(how='all')
cur = tamp_product_session.execute(sql)
cur = tamp_fund_session.execute(sql)
data = cur.fetchall()
df = pd.DataFrame(list(data), columns=['ts_code', 'fund_type', 'management'])
......@@ -167,17 +161,18 @@ def get_fund_info(end_date, invest_type):
df.rename({'ts_code': 'fund_id'}, axis=1, inplace=True)
df = pd.merge(df, manager_info, how="left", on='fund_id')
else:
sql = "SELECT id, substrategy FROM fund_info WHERE delete_tag=0 " \
"AND substrategy!=-1"
cur = tamp_product_session.execute(sql)
sql = "SELECT a.id, a.substrategy, b.fund_manager_id " \
"FROM fund_info as a LEFT JOIN fund_manager_mapping as b " \
"ON a.id = b.fund_id WHERE a.delete_tag=0 " \
"AND a.substrategy!=-1 AND a.id='{}'".format(fund)
cur = tamp_fund_session.execute(sql)
data = cur.fetchall()
df = pd.DataFrame(list(data), columns=['id', 'substrategy'])
df = pd.DataFrame(list(data), columns=['fund_id', 'substrategy', 'manager'])
# df = pd.read_sql(sql, con).dropna(how='all')
df.rename({'id': 'fund_id'}, axis=1, inplace=True)
manager_info = get_manager(invest_type)
df = pd.merge(df, manager_info, how="inner", on='fund_id')
# df.rename({'id': 'fund_id'}, axis=1, inplace=True)
# manager_info = get_manager(invest_type)
# df = pd.merge(df, manager_info, how="inner", on='fund_id')
return df
......@@ -255,7 +250,7 @@ def metric_rank(df):
return df
def fund_rank(start_date, end_date, invest_type='private'):
def fund_rank(start_date, end_date, invest_type=1):
fund_info = get_fund_info(end_date, invest_type=invest_type)
group = fund_info.groupby('substrategy')
......@@ -270,7 +265,7 @@ def fund_rank(start_date, end_date, invest_type='private'):
for substrategy in grouped_fund.index:
for fund in grouped_fund[substrategy]:
df = get_nav(fund, start_date, rollback=False, invest_type=invest_type)
df = get_tamp_nav(fund, start_date, rollback=False, invest_type=invest_type)
try:
if df.index[-1] - df.index[0] < 0.6 * (end_date - start_date):
......@@ -278,7 +273,7 @@ def fund_rank(start_date, end_date, invest_type='private'):
logging.log(logging.INFO, 'Skipped {}'.format(fund))
continue
n = get_frequency(df)
except Exception as e:
except:
# logging.log(logging.ERROR, repr(e))
logging.log(logging.INFO, 'Skipped {}'.format(fund))
continue
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment