宗熹新算法推荐

fccb2110 · 赵杰 · c485365b · fccb2110 · fccb2110
Commit fccb2110 authored Dec 24, 2020 by 赵杰
Show whitespace changes
Inline Side-by-side

Showing with 111 additions and 56 deletions

data_service_v2_1.py app/service/data_service_v2_1.py +3 -0

portfolio_diagnose.py app/service/portfolio_diagnose.py +108 -56

No files found.
--- a/app/service/data_service_v2_1.py
+++ b/app/service/data_service_v2_1.py
@@ -105,6 +105,7 @@ class UserCustomerDataAdaptor:
                    product_df = product_df.drop_duplicates("fund_id")
            user_customer_order_df = order_df.set_index('fund_id').join(product_df.set_index('fund_id')).reset_index()
+            user_customer_order_df["confirm_share_date"] = user_customer_order_df["confirm_share_date"].apply(lambda x: pd.to_datetime(x.date()))
            self.start_date = user_customer_order_df["confirm_share_date"].min()
            self.customer_real_name = user_customer_order_df["customer_name"].values[0]
            self.ifa_real_name = user_customer_order_df["username"].values[0]
@@ -162,6 +163,8 @@ class UserCustomerDataAdaptor:
                data = cur.fetchall()
                cur_fund_performance_df = pd.DataFrame(list(data),
                columns=['price_date', 'ret_1w', 'ret_cum_1m', 'ret_cum_6m', 'ret_cum_1y', 'ret_cum_ytd', 'ret_cum_incep'])
+                cur_fund_performance_df = cur_fund_performance_df[
+                    cur_fund_performance_df["price_date"] <= self.end_date.date()]
                self.all_fund_performance[cur_fund_id] = cur_fund_performance_df
                cur_fund_nav_df["price_date"] = pd.to_datetime(cur_fund_nav_df["price_date"])

--- a/app/service/portfolio_diagnose.py
+++ b/app/service/portfolio_diagnose.py
@@ -124,7 +124,7 @@ def choose_good_evaluation(evaluation):
    if v1[0] > 1:
        del evaluation[1]
-    if v2[0] > 1 and float(v2[1].strip('%')) <= 60:
+    if (v2[0] > 1 and float(v2[1].strip('%')) <= 60) or math.isnan(float(v2[1].strip('%'))):
        del evaluation[2]
    if v3[0] > 1:
        del evaluation[3]
@@ -145,7 +145,7 @@ def choose_bad_evaluation(evaluation):
    if v1[0] < 2:
        del evaluation[1]
-    if v2[0] < 2:
+    if v2[0] < 2 or math.isnan(float(v2[1].strip('%'))):
        del evaluation[2]
    if v3[0] < 2:
        del evaluation[3]
@@ -235,10 +235,10 @@ def get_tamp_fund():
    Returns:
    """
-    with TAMP_SQL(tamp_fund_engine) as tamp_fund:
+    with TAMP_SQL(tamp_product_engine) as tamp_prod:
-        tamp_fund_session = tamp_fund.session
+        tamp_prod_session = tamp_prod.session
-        sql = "SELECT id FROM tamp_fund_info WHERE id LIKE 'HF%'"
+        sql = "SELECT id FROM fund_info WHERE `status` = 1 and strategy!=7"
-        cur = tamp_fund_session.execute(sql)
+        cur = tamp_prod_session.execute(sql)
        data = cur.fetchall()
        # df = pd.read_sql(sql, con)
        df = pd.DataFrame(list(data), columns=['fund_id'])
@@ -408,10 +408,13 @@ class PortfolioDiagnose(object):
        prod = get_tamp_nav(self.portfolio[0], self.start_date, invest_type=self.invest_type)
        fund_info = get_fund_info(self.end_date, invest_type=self.invest_type)
-        # while prod is None or prod.index[-1] - prod.index[0] < 0.6 * (self.end_date - self.start_date):
+        while prod is None or prod.index[-1] - prod.index[0] < 0.6 * (self.end_date - self.start_date):
-        while prod is None:
+        # while prod is None:
            # 获取的净值表为空时首先考虑基金净值数据不足半年，查找同一基金经理下的相同二级策略的基金ID作替换
            result = fund_info[fund_info['fund_id'] == self.portfolio[0]]
+            if result.empty:
+                break
            manager = str(result['manager'].values)
            strategy = result['substrategy'].values
@@ -512,7 +515,7 @@ class PortfolioDiagnose(object):
        # 组合内已包含的策略
        # included_strategy = set()
        # 按每种基金最少投资100w确定组合包含的最大基金数量
-        max_len = len(self.portfolio) - len(prod.columns)
+        max_len = min(len(self.portfolio) - len(prod.columns),  self.invest_amount/1e6)
        # 排名表内包含的所有策略
        # all_strategy = set(fund_rank['substrategy'].to_list())
@@ -527,45 +530,84 @@ class PortfolioDiagnose(object):
        # add_strategy = all_strategy - included_strategy
        add_risk = all_risk - included_risk
-        # 遍历产品池，推荐得分>80且与组合内其他基金相关度低于0.8的属于待添加策略的基金
+        candidate_funds = tamp_fund['fund_id'].to_list()
-        for proposal in tamp_fund['fund_id']:
+        candidate_info = []
+        for proposal in candidate_funds:
            if proposal in fund_rank['fund_id'].to_list() and proposal not in prod.columns:
                proposal_z_score = search_rank(fund_rank, proposal, metric='z_score')
                proposal_strategy = fund_rank[fund_rank['fund_id'] == proposal]['substrategy'].values[0]
-            else:
+                proposal_risk = get_risk_level(proposal_strategy)
-                continue
+                if proposal_z_score >= 60:
+                    candidate_info.append((proposal, proposal_z_score, proposal_risk))
+        candidate_info.sort(key=lambda elem: elem[1], reverse=True)
+        # candidate_high_risk = [i[0] for i in list(filter(lambda x: x[2] == 'H', candidate_info))]
+        # candidate_median_risk = [i[0] for i in list(filter(lambda x: x[2] == 'M', candidate_info))]
+        # candidate_low_risk = [i[0] for i in list(filter(lambda x: x[2] == 'L', candidate_info))]
+        candidate_funds = [i[0] for i in candidate_info]
-            if proposal_z_score > 60 and (get_risk_level(proposal_strategy) in add_risk or not add_risk):
+        # 遍历产品池，推荐得分>80且与组合内其他基金相关度低于0.8的属于待添加策略的基金
-                # if proposal_z_score > 80:
+        # for proposal in candidate_funds:
+        #     proposal_strategy = fund_rank[fund_rank['fund_id'] == proposal]['substrategy'].values[0]
+        #     if get_risk_level(proposal_strategy) in add_risk or not add_risk:
+        #         # if proposal_z_score > 80:
+        #         proposal_nav = get_tamp_nav(proposal, self.start_date, invest_type=self.invest_type)
+        #         # 忽略净值周期大于周更的产品
+        #         # if get_frequency(proposal_nav) <= 52:
+        #         #     continue
+        #
+        #         self.freq_list.append(get_frequency(proposal_nav))
+        #         proposal_nav = rename_col(proposal_nav, proposal)
+        #
+        #         # 按最大周期进行重采样，计算新建组合的相关性
+        #         prod = pd.merge(prod, proposal_nav, how='outer', on='end_date').astype(float)
+        #         prod.sort_index(inplace=True)
+        #         prod.ffill(inplace=True)
+        #         prod = resample(prod, get_trade_cal(), min(self.freq_list))
+        #
+        #         self.new_correlation = cal_correlation(prod)
+        #         judge_correlation = self.new_correlation.fillna(0)
+        #
+        #         if np.all(judge_correlation < 0.8):
+        #             self.proposal_fund.append(proposal)
+        #             max_len -= 1
+        #             # add_strategy -= {proposal_strategy}
+        #             add_risk -= {get_risk_level(proposal_strategy)}
+        #             # if len(add_strategy) == 0 or max_len == 0:
+        #             if max_len == 0:
+        #                 break
+        #         else:
+        #             prod.drop(columns=proposal, inplace=True)
+        # 遍历产品池，推荐得分>80且与组合内其他基金相关度低于0.8的属于待添加策略的基金
+        for proposal in candidate_funds:
            proposal_nav = get_tamp_nav(proposal, self.start_date, invest_type=self.invest_type)
            # 忽略净值周期大于周更的产品
            # if get_frequency(proposal_nav) <= 52:
            #     continue
            self.freq_list.append(get_frequency(proposal_nav))
            proposal_nav = rename_col(proposal_nav, proposal)
            # 按最大周期进行重采样，计算新建组合的相关性
-                prod_with_new_fund = pd.merge(prod, proposal_nav, how='outer', on='end_date').astype(float)
+            temp = pd.merge(prod, proposal_nav, how='outer', on='end_date').astype(float)
-                prod_with_new_fund.sort_index(inplace=True)
+            temp.sort_index(inplace=True)
-                prod_with_new_fund.ffill(inplace=True)
+            temp.ffill(inplace=True)
+            temp = resample(temp, get_trade_cal(), min(self.freq_list))
-                prod_with_new_fund = resample(prod_with_new_fund, get_trade_cal(), min(self.freq_list))
+            self.new_correlation = cal_correlation(temp)
-                self.new_correlation = cal_correlation(prod_with_new_fund)
            judge_correlation = self.new_correlation.fillna(0)
            if np.all(judge_correlation < 0.8):
-                    prod = prod_with_new_fund
                self.proposal_fund.append(proposal)
                max_len -= 1
                # add_strategy -= {proposal_strategy}
                add_risk -= {get_risk_level(proposal_strategy)}
                # if len(add_strategy) == 0 or max_len == 0:
+                prod = temp
                if max_len == 0:
                    break
-                else:
-                    # prod.drop(columns=proposal, inplace=True)
-                    self.freq_list.pop()
        prod.dropna(how='all', inplace=True)
        prod.fillna(method='bfill', inplace=True)
@@ -588,9 +630,9 @@ class PortfolioDiagnose(object):
        print("遍历产品池获取候选推荐时间：", end3 - end2)
        # propose_portfolio.to_csv('test_portfolio.csv', encoding='gbk')
-        mu = expected_returns.mean_historical_return(self.propose_portfolio, frequency=min(self.freq_list))
+        mu = [search_rank(fund_rank, x, 'annual_return') for x in self.propose_portfolio.columns]
        S = risk_models.sample_cov(self.propose_portfolio, frequency=min(self.freq_list))
-        dd = expected_returns.drawdown_from_prices(self.propose_portfolio)
+        dd = [search_rank(fund_rank, x, 'max_drawdown') for x in self.propose_portfolio.columns]
        # if self.client_type == 1:
        # proposal_risk = [[x, get_risk_level(search_rank(fund_rank, x, metric='substrategy'))] for x in
@@ -607,20 +649,30 @@ class PortfolioDiagnose(object):
            propose_risk_mapper[fund] = str(get_risk_level(search_rank(fund_rank, fund, metric='substrategy')))
        if self.client_type == 1:
-            risk_upper = {"L": 0.6, "M": 0.4, "H": 0.0}
+            risk_upper = {"M": 0.4, "H": 0.0}
-            risk_lower = {"L": 0.6, "M": 0.4, "H": 0.0}
+            risk_lower = {"L": 0.6}
+            self.expect_return = 0.12
+            self.expect_drawdown = 0.03
        elif self.client_type == 2:
-            risk_upper = {"L": 0.5, "M": 0.3, "H": 0.2}
+            risk_upper = {"H": 0.2}
-            risk_lower = {"L": 0.5, "M": 0.3, "H": 0.2}
+            risk_lower = {"L": 0.5, "M": 0.3}
+            self.expect_return = 0.15
+            self.expect_drawdown = 0.05
        elif self.client_type == 3:
-            risk_upper = {"L": 0.3, "M": 0.5, "H": 0.2}
+            risk_upper = {"L": 0.3, "H": 0.3}
-            risk_lower = {"L": 0.3, "M": 0.5, "H": 0.2}
+            risk_lower = {"M": 0.4}
+            self.expect_return = 0.18
+            self.expect_drawdown = 0.08
        elif self.client_type == 4:
-            risk_upper = {"L": 0.3, "M": 0.4, "H": 0.3}
+            risk_upper = {"L": 0.2, "M": 0.4}
-            risk_lower = {"L": 0.3, "M": 0.4, "H": 0.3}
+            risk_lower = {"H": 0.4}
+            self.expect_return = 0.15
+            self.expect_drawdown = 0.20
        elif self.client_type == 5:
-            risk_upper = {"L": 0.0, "M": 0.5, "H": 0.5}
+            risk_upper = {"L": 0.0, "M": 0.4}
-            risk_lower = {"L": 0.0, "M": 0.5, "H": 0.5}
+            risk_lower = {"H": 0.6}
+            self.expect_return = 0.25
+            self.expect_drawdown = 0.15
        else:
            risk_upper = {"H": 1.0}
            risk_lower = {"L": 0.0}