data_service.py 15 KB
Newer Older
赵杰's avatar
赵杰 committed
1 2 3 4 5 6 7 8 9 10 11
#!/usr/bin/python3.6
# -*- coding: utf-8 -*-
# @Time    : 2020/11/18 19:12
# @Author  : Jie. Z
# @Email   : zhaojiestudy@163.com
# @File    : data_service.py
# @Software: PyCharm

import pandas as pd
import numpy as np
from sqlalchemy import and_
赵杰's avatar
赵杰 committed
12 13
import tushare as ts
import datetime
赵杰's avatar
赵杰 committed
14
from app.api.engine import tamp_user_session, tamp_product_session
15 16
# from app.model.tamp_user_models import CustomerOrder, CustomerInfo
# from app.model.tamp_product_models import FundInfo
赵杰's avatar
赵杰 committed
17 18 19 20 21


class UserCustomerDataAdaptor:
    user_id = ""
    customer_id = ""
22
    month_date = ""
赵杰's avatar
赵杰 committed
23
    end_date = ""
赵杰's avatar
赵杰 committed
24
    group_data = {}
25 26 27
    trade_cal = None
    all_fund_distribution = {}
    all_fund_performance = {}
赵杰's avatar
赵杰 committed
28

29
    def __init__(self, user_id, customer_id, end_date=str(datetime.date.today()), index_id="IN0000007M"):
赵杰's avatar
赵杰 committed
30 31
        self.user_id = user_id
        self.customer_id = customer_id
32
        self.compare_index_id = index_id
赵杰's avatar
赵杰 committed
33 34
        p_end_date = pd.to_datetime(end_date).date()
        p_end_date = datetime.date(year=p_end_date.year, month=p_end_date.month, day=1) - datetime.timedelta(days=1)
35 36 37 38
        # self.end_date = pd.to_datetime(str(p_end_date))
        self.end_date = pd.to_datetime("2020-11-23")
        p_start_date = datetime.date(year=p_end_date.year, month=p_end_date.month, day=1)
        self.month_start_date = pd.to_datetime("2020-11-16")
赵杰's avatar
赵杰 committed
39
        self.user_customer_order_df = self.get_user_customer_order_data()
40
        self.fund_nav_total, self.fund_cnav_total = self.get_customer_fund_nav_data()
赵杰's avatar
赵杰 committed
41
        self.get_customer_index_nav_data()
42
        self.total_customer_order_cnav_df = self.total_combine_data()
赵杰's avatar
赵杰 committed
43 44 45 46 47 48 49 50 51 52 53 54 55
        self.group_operate()

    @staticmethod
    def get_trade_cal(start_date, end_date):
        ts.set_token('ac1f734f8a25651aa07319ca35b1b0c0854e361e306fe85d85e092bc')
        pro = ts.pro_api()
        if end_date is not None:
            df = pro.trade_cal(exchange='SSE', start_date=start_date, end_date=end_date, is_open='1')
        else:
            df = pro.trade_cal(exchange='SSE', start_date=start_date, is_open='1')
        df.drop(['exchange', 'is_open'], axis=1, inplace=True)
        df.rename(columns={'cal_date': 'end_date'}, inplace=True)
        df["datetime"] = df["end_date"].apply(lambda x: datetime.datetime.strptime(x, "%Y%m%d"))
56

赵杰's avatar
赵杰 committed
57
        return df
赵杰's avatar
赵杰 committed
58 59 60

    # 获取理财师下该用户所有订单列表
    def get_user_customer_order_data(self):
61 62
        # data1 = tamp_user_session.query(CustomerOrder)\
        #         #     .filter(user_id = self.user_id).all()
赵杰's avatar
赵杰 committed
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
        # data2 = tamp_user_session.query(t_customer_info).all()
        # data3 = tamp_product_session.query(t_fund_info).all()

        sql_user = """select f2.realname,f3.customer_name,fund_id,f1.order_type,f1.pay_date,f1.subscription_fee,f1.confirm_share_date,f1.confirm_share,f1.confirm_amount,f1.nav,f1.folio_name from customer_order f1, user_info f2,customer_info f3   where f2.id=f1.user_id and f3.id=f1.customer_id and f1.user_id='{}' and f1.customer_id='{}'""".format(self.user_id, self.customer_id)
        cur = tamp_user_session.execute(sql_user)
        data = cur.fetchall()
        order_df = pd.DataFrame(list(data), columns=['username', 'customer_name', 'fund_id', 'order_type', 'pay_date',
                                                     'subscription_fee', 'confirm_share_date', 'confirm_share',
                                                     'confirm_amount', 'nav', 'folio_name'])

        sql_product = "select distinct `id`, `fund_short_name`, `nav_frequency`, `substrategy` from `fund_info`"
        cur = tamp_product_session.execute(sql_product)
        data = cur.fetchall()
        product_df = pd.DataFrame(list(data), columns=['fund_id', 'fund_name', 'freq', 'substrategy'])

        user_customer_order_df = order_df.set_index('fund_id').join(product_df.set_index('fund_id')).reset_index()
赵杰's avatar
赵杰 committed
79
        self.start_date = user_customer_order_df["confirm_share_date"].min()
赵杰's avatar
赵杰 committed
80 81 82 83
        return user_customer_order_df

    # 获取客户持有的基金净值数据
    def get_customer_fund_nav_data(self):
赵杰's avatar
赵杰 committed
84 85
        now_date = datetime.datetime.now().strftime("%Y%m%d")
        trade_date_df = self.get_trade_cal("20000101", now_date)
86
        self.trade_cal = trade_date_df
赵杰's avatar
赵杰 committed
87
        all_fund_nav = pd.DataFrame(index=trade_date_df["datetime"])
88
        all_fund_cnav = pd.DataFrame(index=trade_date_df["datetime"])
赵杰's avatar
赵杰 committed
89 90 91 92 93 94 95 96

        for cur_fund_id in self.user_customer_order_df["fund_id"].unique():
            # 对应基金净值
            sql = """select distinct `price_date`, `nav`,`cumulative_nav` from `fund_nav` where `fund_id`='{}'  order by `price_date` ASC""".format(cur_fund_id)
            cur = tamp_product_session.execute(sql)
            data = cur.fetchall()
            cur_fund_nav_df = pd.DataFrame(list(data), columns=['price_date', 'nav', 'cnav'])

赵杰's avatar
赵杰 committed
97
            # # 对应基金分红
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
            sql = """select distinct `distribute_date`, `distribution` from `fund_distribution` where `fund_id`='{}' and `distribute_type`='1' order by `distribute_date` ASC""".format(
                cur_fund_id)
            cur = tamp_product_session.execute(sql)
            data = cur.fetchall()
            cur_fund_distribution_df = pd.DataFrame(list(data), columns=['price_date', 'distribution'])
            self.all_fund_distribution[cur_fund_id] = cur_fund_distribution_df

            # 对应基金performance数据
            sql = """select distinct `price_date`, `ret_1w`, `ret_cum_1m`, `ret_cum_6m`, `ret_cum_1y`, `ret_cum_ytd`, `ret_cum_incep` from `fund_performance` where `fund_id`='{}' order by `price_date` ASC""".format(
                cur_fund_id)
            cur = tamp_product_session.execute(sql)
            data = cur.fetchall()
            cur_fund_performance_df = pd.DataFrame(list(data),
            columns=['price_date', 'ret_1w', 'ret_cum_1m', 'ret_cum_6m', 'ret_cum_1y', 'ret_cum_ytd', 'ret_cum_incep'])
            self.all_fund_performance[cur_fund_id] = cur_fund_performance_df
赵杰's avatar
赵杰 committed
113 114 115

            cur_fund_nav_df["price_date"] = pd.to_datetime(cur_fund_nav_df["price_date"])
            cur_fund_nav_df.set_index("price_date", inplace=True)
116 117 118
            all_fund_nav[cur_fund_id] = cur_fund_nav_df["nav"]
            all_fund_cnav[cur_fund_id] = cur_fund_nav_df["cnav"]

赵杰's avatar
赵杰 committed
119
        all_fund_nav = all_fund_nav[all_fund_nav.index <= self.end_date]
120 121
        all_fund_cnav = all_fund_cnav[all_fund_cnav.index <= self.end_date]
        return all_fund_nav, all_fund_cnav
赵杰's avatar
赵杰 committed
122 123

    # 获取客户对比指数净值数据
赵杰's avatar
赵杰 committed
124 125 126 127 128 129 130
    def get_customer_index_nav_data(self, index_id="IN0000007M"):
        sql = "select distinct price_date,close from fund_market_indexes where index_id='{}'  order by price_date ASC".format(index_id)
        cur = tamp_product_session.execute(sql)
        data = cur.fetchall()
        index_df = pd.DataFrame(list(data), columns=['price_date', 'index'])
        index_df["price_date"] = pd.to_datetime(index_df["price_date"])
        index_df.set_index("price_date", inplace=True)
131
        self.fund_cnav_total["index"] = index_df["index"]
赵杰's avatar
赵杰 committed
132 133 134 135 136 137 138 139 140

        return index_df

    # 分组合计算
    def group_operate(self):
        for folio in self.user_customer_order_df["folio_name"].unique():
            cur_folio_order_df = self.user_customer_order_df[self.user_customer_order_df["folio_name"] == folio]
            fund_id_list = list(self.user_customer_order_df["fund_id"].unique())
            cur_folio_nav_df = self.fund_nav_total[fund_id_list]
141 142 143
            # fund_id_list.append("index")
            cur_folio_cnav_df = self.fund_cnav_total[fund_id_list]
            self.signal_folio_operate(folio, cur_folio_order_df, cur_folio_nav_df, cur_folio_cnav_df)
赵杰's avatar
赵杰 committed
144
            continue
赵杰's avatar
赵杰 committed
145

赵杰's avatar
赵杰 committed
146
    # 单个组合数据操作
147
    def signal_folio_operate(self, p_folio, p_order_df, p_nav_df, p_cnav_df):
赵杰's avatar
赵杰 committed
148 149
        start_date = pd.to_datetime(p_order_df["confirm_share_date"].min())

150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
        cnav_df = p_cnav_df[p_cnav_df.index >= start_date].copy()

        p_fund_id_list = list(p_order_df["fund_id"].unique())
        for p_fund_id in p_fund_id_list:
            order_min_date = p_order_df[p_order_df["fund_id"] == p_fund_id]["confirm_share_date"].min()
            if pd.to_datetime(order_min_date) > start_date:
                cnav_df.loc[:order_min_date - datetime.timedelta(days=1), p_fund_id] = np.nan

        for index, row in p_order_df.iterrows():
            cur_fund_id = str(row["fund_id"])
            confirm_share_date = pd.to_datetime(row["confirm_share_date"])

            # 根据确认净值日查看是否含有累积净值的数据,如果没有按照前后差值推算当天累积净值
            if pd.isnull(cnav_df.loc[confirm_share_date, cur_fund_id]):
                last_nav_data = p_nav_df[p_nav_df.index < confirm_share_date][cur_fund_id].dropna().tail(1)
                last_cnav_data = p_cnav_df[p_cnav_df.index < confirm_share_date][cur_fund_id].dropna().tail(1)
                # 判断上个净值日和当前确认日之中是否存在分红日
                """need add judge"""

                if len(last_nav_data) < 1:
                    cnav_df.loc[confirm_share_date, cur_fund_id] = row["nav"]
                else:
                    diff_nav = row["nav"] - last_nav_data.values[0]
                    cur_cnav = last_cnav_data.values[0] + diff_nav
                    cnav_df.loc[confirm_share_date, cur_fund_id] = cur_cnav

        cnav_df = cnav_df.dropna(axis=0, how="all").fillna(method='ffill')
赵杰's avatar
赵杰 committed
177 178 179 180
        for index, row in p_order_df.iterrows():
            cur_fund_id = str(row["fund_id"])
            confirm_share_date = pd.to_datetime(row["confirm_share_date"])

181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
            # # 根据确认净值日查看是否含有累积净值的数据,如果没有按照前后差值推算当天累积净值
            # if pd.isnull(cnav_df.loc[confirm_share_date, cur_fund_id]):
            #     last_nav_data = p_nav_df[p_nav_df.index < confirm_share_date][cur_fund_id].dropna().tail(1)
            #     last_cnav_data = p_cnav_df[p_cnav_df.index < confirm_share_date][cur_fund_id].dropna().tail(1)
            #     # 判断上个净值日和当前确认日之中是否存在分红日
            #     """need add judge"""
            #
            #     if len(last_nav_data) < 1:
            #         cnav_df.loc[confirm_share_date, cur_fund_id] = row["nav"]
            #     else:
            #         diff_nav = row["nav"] - last_nav_data.values[0]
            #         cur_cnav = last_cnav_data.values[0] + diff_nav
            #         cnav_df.loc[confirm_share_date, cur_fund_id] = cur_cnav

            if cur_fund_id+"_amount" not in cnav_df:
                profit = cnav_df[cur_fund_id].dropna() - cnav_df[cur_fund_id].dropna().shift(1)
                cnav_df[cur_fund_id + "_profit"] = profit
                cnav_df[cur_fund_id + "_profit_ratio"] = profit / cnav_df[cur_fund_id].dropna().shift(1)
                cnav_df[cur_fund_id+"_amount"] = 0
                cnav_df[cur_fund_id + "_earn"] = 0
                # cnav_df[cur_fund_id + "_cum_earn"] = 0
                cnav_df[cur_fund_id + "_share"] = 0
赵杰's avatar
赵杰 committed
203 204 205

            # buy
            if row['order_type'] == 1:
206 207
                cnav_df.loc[confirm_share_date:, cur_fund_id + "_amount"] += row["confirm_amount"]
                cnav_df.loc[confirm_share_date:, cur_fund_id + "_share"] += row["confirm_share"]
赵杰's avatar
赵杰 committed
208 209
            # sell
            elif row['order_type'] == 2:
210 211
                cnav_df.loc[confirm_share_date:, cur_fund_id + "_amount"] -= row["confirm_amount"]
                cnav_df.loc[confirm_share_date:, cur_fund_id + "_share"] -= row["confirm_share"]
赵杰's avatar
赵杰 committed
212

213 214 215 216 217
            cnav_df[cur_fund_id + "_earn"] = cnav_df[cur_fund_id + "_profit"] * cnav_df[cur_fund_id + "_share"]
            cnav_df[cur_fund_id + "_earn"] = cnav_df[cur_fund_id + "_earn"].apply(lambda x: float(x))
            cnav_df[cur_fund_id + "_cum_earn"] = cnav_df[cur_fund_id + "_earn"].cumsum()
        self.group_data[p_folio] = {"result_cnav_data": cnav_df, "order_df": p_order_df}
        return cnav_df
赵杰's avatar
赵杰 committed
218

219
    # 所有的数据操作
220
    def total_combine_data(self):
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279

        p_order_df = self.user_customer_order_df.copy()
        p_nav_df = self.fund_nav_total.copy()
        p_cnav_df = self.fund_cnav_total.copy()

        start_date = pd.to_datetime(p_order_df["confirm_share_date"].min())
        cnav_df = p_cnav_df[p_cnav_df.index >= start_date].copy()

        p_fund_id_list = list(p_order_df["fund_id"].unique())
        for p_fund_id in p_fund_id_list:
            order_min_date = p_order_df[p_order_df["fund_id"] == p_fund_id]["confirm_share_date"].min()
            if pd.to_datetime(order_min_date) > start_date:
                cnav_df.loc[:order_min_date - datetime.timedelta(days=1), p_fund_id] = np.nan

        for index, row in p_order_df.iterrows():
            cur_fund_id = str(row["fund_id"])
            confirm_share_date = pd.to_datetime(row["confirm_share_date"])

            # 根据确认净值日查看是否含有累积净值的数据,如果没有按照前后差值推算当天累积净值
            if pd.isnull(cnav_df.loc[confirm_share_date, cur_fund_id]):
                last_nav_data = p_nav_df[p_nav_df.index < confirm_share_date][cur_fund_id].dropna().tail(1)
                last_cnav_data = p_cnav_df[p_cnav_df.index < confirm_share_date][cur_fund_id].dropna().tail(1)
                # 判断上个净值日和当前确认日之中是否存在分红日
                """need add judge"""

                if len(last_nav_data) < 1:
                    cnav_df.loc[confirm_share_date, cur_fund_id] = row["nav"]
                else:
                    diff_nav = row["nav"] - last_nav_data.values[0]
                    cur_cnav = last_cnav_data.values[0] + diff_nav
                    cnav_df.loc[confirm_share_date, cur_fund_id] = cur_cnav

        cnav_df = cnav_df.dropna(axis=0, how="all").fillna(method='ffill')
        for index, row in p_order_df.iterrows():
            cur_fund_id = str(row["fund_id"])
            confirm_share_date = pd.to_datetime(row["confirm_share_date"])
            if cur_fund_id + "_amount" not in cnav_df:
                profit = cnav_df[cur_fund_id].dropna() - cnav_df[cur_fund_id].dropna().shift(1)
                cnav_df[cur_fund_id + "_profit"] = profit
                cnav_df[cur_fund_id + "_profit_ratio"] = profit / cnav_df[cur_fund_id].dropna().shift(1)
                cnav_df[cur_fund_id + "_amount"] = 0
                cnav_df[cur_fund_id + "_earn"] = 0
                # cnav_df[cur_fund_id + "_cum_earn"] = 0
                cnav_df[cur_fund_id + "_share"] = 0

            # buy
            if row['order_type'] == 1:
                cnav_df.loc[confirm_share_date:, cur_fund_id + "_amount"] += row["confirm_amount"]
                cnav_df.loc[confirm_share_date:, cur_fund_id + "_share"] += row["confirm_share"]
            # sell
            elif row['order_type'] == 2:
                cnav_df.loc[confirm_share_date:, cur_fund_id + "_amount"] -= row["confirm_amount"]
                cnav_df.loc[confirm_share_date:, cur_fund_id + "_share"] -= row["confirm_share"]

            cnav_df[cur_fund_id + "_earn"] = cnav_df[cur_fund_id + "_profit"] * cnav_df[cur_fund_id + "_share"]
            cnav_df[cur_fund_id + "_earn"] = cnav_df[cur_fund_id + "_earn"].apply(lambda x: float(x))
            cnav_df[cur_fund_id + "_cum_earn"] = cnav_df[cur_fund_id + "_earn"].cumsum()

        return cnav_df