舍弃fund_info全加载

bd8b72f3 · 李宗熹 · 946057e1 · bd8b72f3 · bd8b72f3
Commit bd8b72f3 authored Jan 22, 2021 by 李宗熹
Show whitespace changes
Inline Side-by-side

Showing with 102 additions and 60 deletions

portfolio_diagnose.py app/service/portfolio_diagnose.py +62 -15

fund_rank.py app/utils/fund_rank.py +40 -45

No files found.
--- a/app/service/portfolio_diagnose.py
+++ b/app/service/portfolio_diagnose.py
@@ -299,6 +299,53 @@ def get_tamp_nav(fund, start_date, rollback=False, invest_type=2):
    return df
+def get_nav(fund, start_date, rollback=False, invest_type='private'):
+    """获取基金ID为fund, 起始日期为start_date, 终止日期为当前日期的基金净值表
+    Args:
+        fund[str]:基金ID
+        start_date[date]:起始日期
+        rollback[bool]:当起始日期不在净值公布日历中，是否往前取最近的净值公布日
+        public[bool]:是否为公募
+    Returns:df[DataFrame]: 索引为净值公布日， 列为复权净值的净值表; 查询失败则返回None
+    """
+    with TAMP_SQL(tamp_fund_engine) as tamp_product:
+        tamp_product_session = tamp_product.session
+        if invest_type == 'public':
+            sql = "SELECT ts_code, end_date, adj_nav FROM public_fund_nav " \
+                  "WHERE ts_code='{}'".format(fund)
+            cur = tamp_product_session.execute(sql)
+            data = cur.fetchall()
+            df = pd.DataFrame(list(data), columns=['fund_id', 'end_date', 'adj_nav']).dropna(how='any')
+            df.rename({'ts_code': 'fund_id'}, axis=1, inplace=True)
+        else:
+            sql = "SELECT fund_id, price_date, cumulative_nav FROM fund_nav " \
+                  "WHERE fund_id='{}'".format(fund)
+            # df = pd.read_sql(sql, con).dropna(how='any')
+            cur = tamp_product_session.execute(sql)
+            data = cur.fetchall()
+            df = pd.DataFrame(data, columns=['fund_id', 'price_date', 'cumulative_nav']).dropna(how='any')
+            df.rename({'price_date': 'end_date', 'cumulative_nav': 'adj_nav'}, axis=1, inplace=True)
+        if df['adj_nav'].count() == 0:
+            # logging.log(logging.ERROR, "CAN NOT FIND {}".format(fund))
+            return None
+        df['end_date'] = pd.to_datetime(df['end_date'])
+        if rollback and df['end_date'].min() < start_date < df['end_date'].max():
+            while start_date not in list(df['end_date']):
+                start_date -= datetime.timedelta(days=1)
+        df = df[df['end_date'] >= start_date]
+        df.drop_duplicates(subset='end_date', inplace=True, keep='first')
+        df.set_index('end_date', inplace=True)
+        df.sort_index(inplace=True, ascending=True)
+        return df
 def get_risk_level(substrategy):
    """获取风险类型
@@ -439,19 +486,20 @@ class PortfolioDiagnose(object):
        """
        # 获取原始投资组合的第一支基金的净值表
        prod = get_tamp_nav(self.portfolio[0], self.start_date, invest_type=self.portfolio_dict[self.portfolio[0]])
-        fund_info = get_fund_info(self.end_date, invest_type=self.invest_type)
+        # fund_info = get_fund_info(self.end_date, invest_type=self.invest_type)
        # while prod is None or prod.index[-1] - prod.index[0] < 0.6 * (self.end_date - self.start_date):
        while prod is None:
            # 获取的净值表为空时首先考虑基金净值数据不足半年，查找同一基金经理下的相同二级策略的基金ID作替换
+            fund_info = get_fund_info(self.portfolio[0], self.end_date, self.invest_type)
            result = fund_info[fund_info['fund_id'] == self.portfolio[0]]
            if result.empty:
                break
            manager = str(result['manager'].values)
-            strategy = result['substrategy'].values
+            strategy = result['substrategy'].values[0]
+            print('基金id：', self.portfolio[0], '基金经理: ', manager, '策略： ', strategy)
            replaced_fund = replace_fund(manager, strategy, fund_rank)
+            print('替换基金：', replaced_fund)
            if replaced_fund:
                # 替换基金数据非空则记录替换的基金对
@@ -604,7 +652,6 @@ class PortfolioDiagnose(object):
        return prod
    def optimize(self, ):
        import time
        start = time.time()
@@ -672,7 +719,7 @@ class PortfolioDiagnose(object):
        for i in range(1, max_len):
            proposal_fund_combinations = list(combinations(candidate_funds, r=i))
            for proposal_funds in proposal_fund_combinations:
-                drop_funds = list(set(candidate_funds) - set(proposal_funds))
+                drop_funds = list(set(candidate_funds) - set(proposal_funds) - set(self.replace_pair.values()))
                temp = prod.drop(columns=drop_funds, axis=1)
                mu = [search_rank(fund_rank, x, 'annual_return') for x in temp.columns]
@@ -1310,15 +1357,15 @@ class PortfolioDiagnose(object):
            compare_data.append(com_data)
        return compare_data
-# portfolio = ['HF00002JJ2', 'HF00005DBQ', 'HF0000681Q', 'HF00006693', 'HF00006AZF', 'HF00006BGS']
+# portfolio = {'HF00002JJ2':2, 'HF00005DBQ':2, 'HF0000681Q':2, 'HF00006693':2, 'HF00006AZF':2, 'HF00006BGS':2}
 # portfolio_diagnose = PortfolioDiagnose(client_type=1, portfolio=portfolio, invest_amount=10000000)
 # portfolio_diagnose.optimize()
 # if __name__ == '__main__':
-    # print(portfolio_diagnose.single_fund_radar())
+#     print(portfolio_diagnose.single_fund_radar())
-    # print(portfolio_diagnose.propose_fund_radar())
+#     print(portfolio_diagnose.propose_fund_radar())
-    # print(portfolio_diagnose.old_portfolio_evaluation())
+#     print(portfolio_diagnose.old_portfolio_evaluation())
-    # print('旧组合相关性：', portfolio_diagnose.old_correlation)
+#     print('旧组合相关性：', portfolio_diagnose.old_correlation)
-    # print('新组合相关性：', portfolio_diagnose.new_correlation)
+#     print('新组合相关性：', portfolio_diagnose.new_correlation)
-    # print('旧组合个基评价：', portfolio_diagnose.old_portfolio_evaluation())
+#     print('旧组合个基评价：', portfolio_diagnose.old_portfolio_evaluation())
-    # print('新组合个基评价：', portfolio_diagnose.propose_fund_evaluation())
+#     print('新组合个基评价：', portfolio_diagnose.propose_fund_evaluation())
-    # print(portfolio_diagnose.single_evaluation(fund_id='HF0000681Q'))
+#     print(portfolio_diagnose.single_evaluation(fund_id='HF0000681Q'))
\ No newline at end of file
--- a/app/utils/fund_rank.py
+++ b/app/utils/fund_rank.py
@@ -12,48 +12,42 @@ from app.api.engine import tamp_fund_engine, TAMP_SQL, tamp_product_engine
 from app.utils.week_evaluation import *
-# con = pymysql.connect(host='tamper.mysql.polardb.rds.aliyuncs.com',
+def get_tamp_nav(fund, start_date, rollback=False, invest_type=2):
-#                       user='tamp_fund',
-#                       password='@imeng408',
-#                       database='tamp_fund',
-#                       charset='utf8',
-#                       use_unicode='True')
-def get_nav(fund, start_date, rollback=False, invest_type='public'):
    """获取基金ID为fund, 起始日期为start_date, 终止日期为当前日期的基金净值表
    Args:
        fund[str]:基金ID
        start_date[date]:起始日期
        rollback[bool]:当起始日期不在净值公布日历中，是否往前取最近的净值公布日
-        public[bool]:是否为公募
+        invest_type[num]:0:公募 1:私募 2:优选
    Returns:df[DataFrame]: 索引为净值公布日， 列为复权净值的净值表; 查询失败则返回None
    """
-    with TAMP_SQL(tamp_fund_engine) as tamp_product:
+    with TAMP_SQL(tamp_product_engine) as tamp_product, TAMP_SQL(tamp_fund_engine) as tamp_fund:
        tamp_product_session = tamp_product.session
-        if invest_type == 'public':
+        tamp_fund_session = tamp_fund.session
-            sql = "SELECT ts_code, end_date, adj_nav FROM public_fund_nav " \
+        # if invest_type == "private":
-                  "WHERE ts_code='{}'".format(fund)
+        #     sql = "SELECT fund_id, price_date, cumulative_nav FROM fund_nav " \
-            cur = tamp_product_session.execute(sql)
+        #           "WHERE fund_id='{}'".format(fund)
-            data = cur.fetchall()
+        #     # df = pd.read_sql(sql, con).dropna(how='any')
-            df = pd.DataFrame(list(data), columns=['fund_id', 'end_date', 'adj_nav']).dropna(how='any')
+        #     cur = tamp_product_session.execute(sql)
-            df.rename({'ts_code': 'fund_id'}, axis=1, inplace=True)
+        if invest_type == 0:
-        else:
+            sql = """select distinct `id`, `end_date`, `accum_nav` from `public_fund_nav` where `id`='{}'  order by `end_date` ASC""".format(
-            sql = "SELECT fund_id, price_date, cumulative_nav FROM fund_nav " \
+                fund)
-                  "WHERE fund_id='{}'".format(fund)
+            cur = tamp_fund_session.execute(sql)
-            # df = pd.read_sql(sql, con).dropna(how='any')
+        elif invest_type == 1:
+            sql = """select distinct `fund_id`, `price_date`,`cumulative_nav` from `fund_nav` where `fund_id`='{}'  order by `price_date` ASC""".format(
+                fund)
+            cur = tamp_fund_session.execute(sql)
+        elif invest_type == 2:
+            sql = """select distinct `fund_id`,`price_date`,`cumulative_nav` from `fund_nav` where `fund_id`='{}'  order by `price_date` ASC""".format(
+                fund)
            cur = tamp_product_session.execute(sql)
        data = cur.fetchall()
        df = pd.DataFrame(data, columns=['fund_id', 'price_date', 'cumulative_nav']).dropna(how='any')
        df.rename({'price_date': 'end_date', 'cumulative_nav': 'adj_nav'}, axis=1, inplace=True)
-        if df['adj_nav'].count() == 0:
-            logging.log(logging.ERROR, "CAN NOT FIND {}".format(fund))
-            return None
        df['end_date'] = pd.to_datetime(df['end_date'])
        if rollback and df['end_date'].min() < start_date < df['end_date'].max():
@@ -142,7 +136,7 @@ def get_manager(invest_type):
        return df
-def get_fund_info(end_date, invest_type):
+def get_fund_info(fund, end_date, invest_type):
    """[summary]
    Args:
@@ -152,13 +146,13 @@ def get_fund_info(end_date, invest_type):
    Returns:
        [type]: [description]
    """
-    with TAMP_SQL(tamp_fund_engine) as tamp_product:
+    with TAMP_SQL(tamp_fund_engine) as tamp_fund:
-        tamp_product_session = tamp_product.session
+        tamp_fund_session = tamp_fund.session
        if invest_type == 'public':
            sql = "SELECT ts_code, fund_type, management FROM public_fund_basic " \
                  "WHERE delist_date IS NULL AND (due_date IS NULL OR due_date>'{}')".format(end_date.strftime('%Y%m%d'))
            # df = pd.read_sql(sql, con).dropna(how='all')
-            cur = tamp_product_session.execute(sql)
+            cur = tamp_fund_session.execute(sql)
            data = cur.fetchall()
            df = pd.DataFrame(list(data), columns=['ts_code', 'fund_type', 'management'])
@@ -167,17 +161,18 @@ def get_fund_info(end_date, invest_type):
            df.rename({'ts_code': 'fund_id'}, axis=1, inplace=True)
            df = pd.merge(df, manager_info, how="left", on='fund_id')
        else:
+            sql = "SELECT a.id, a.substrategy, b.fund_manager_id " \
-            sql = "SELECT id, substrategy FROM fund_info WHERE delete_tag=0 " \
+                  "FROM fund_info as a LEFT JOIN fund_manager_mapping as b " \
-                  "AND substrategy!=-1"
+                  "ON a.id = b.fund_id WHERE a.delete_tag=0 " \
-            cur = tamp_product_session.execute(sql)
+                  "AND a.substrategy!=-1 AND a.id='{}'".format(fund)
+            cur = tamp_fund_session.execute(sql)
            data = cur.fetchall()
-            df = pd.DataFrame(list(data), columns=['id', 'substrategy'])
+            df = pd.DataFrame(list(data), columns=['fund_id', 'substrategy', 'manager'])
            # df = pd.read_sql(sql, con).dropna(how='all')
-            df.rename({'id': 'fund_id'}, axis=1, inplace=True)
+            # df.rename({'id': 'fund_id'}, axis=1, inplace=True)
-            manager_info = get_manager(invest_type)
+            # manager_info = get_manager(invest_type)
-            df = pd.merge(df, manager_info, how="inner", on='fund_id')
+            # df = pd.merge(df, manager_info, how="inner", on='fund_id')
        return df
@@ -255,7 +250,7 @@ def metric_rank(df):
    return df
-def fund_rank(start_date, end_date, invest_type='private'):
+def fund_rank(start_date, end_date, invest_type=1):
    fund_info = get_fund_info(end_date, invest_type=invest_type)
    group = fund_info.groupby('substrategy')
@@ -270,7 +265,7 @@ def fund_rank(start_date, end_date, invest_type='private'):
    for substrategy in grouped_fund.index:
        for fund in grouped_fund[substrategy]:
-            df = get_nav(fund, start_date, rollback=False, invest_type=invest_type)
+            df = get_tamp_nav(fund, start_date, rollback=False, invest_type=invest_type)
            try:
                if df.index[-1] - df.index[0] < 0.6 * (end_date - start_date):
@@ -278,7 +273,7 @@ def fund_rank(start_date, end_date, invest_type='private'):
                    logging.log(logging.INFO, 'Skipped {}'.format(fund))
                    continue
                n = get_frequency(df)
-            except Exception as e:
+            except:
                # logging.log(logging.ERROR, repr(e))
                logging.log(logging.INFO, 'Skipped {}'.format(fund))
                continue