Add fund rank

c55c7355 · 李宗熹 · 68e3f7f9 · c55c7355 · c55c7355
Commit c55c7355 authored Nov 19, 2020 by 李宗熹
Hide whitespace changes
Inline Side-by-side

Showing with 484 additions and 0 deletions

fund_rank.py app/utils/fund_rank.py +225 -0

week_evaluation.py app/utils/week_evaluation.py +259 -0

No files found.
--- a/app/utils/fund_rank.py
+++ b/app/utils/fund_rank.py
+import pymysql
+import tushare as ts
+import logging
+logging.basicConfig(level=logging.DEBUG)
+
+from week_evaluation import *
+
+
+con = pymysql.connect(host='tamper.mysql.polardb.rds.aliyuncs.com',
+                      user='tamp_fund',
+                      password='@imeng408',
+                      database='tamp_fund',
+                      charset='utf8',
+                      use_unicode='True')
+
+
+def get_dataframe(fund, start_date, rollback=False):
+    sql = "SELECT ts_code, end_date, adj_nav FROM public_fund_nav " \
+          "WHERE ts_code='{0}'".format(fund)
+    df = pd.read_sql(sql, con).dropna(how='any')
+    if df['adj_nav'].count() == 0:
+        logging.log(logging.ERROR, "CAN NOT FIND {}".format(fund))
+        return None
+
+    df['end_date'] = pd.to_datetime(df['end_date'])
+
+    if rollback:
+        while start_date not in list(df['end_date']):
+            start_date = start_date - datetime.timedelta(days=1)
+
+    df = df[df['end_date'] >= start_date]
+    df.drop_duplicates(subset='end_date', inplace=True, keep='first')
+    df.set_index('end_date', inplace=True)
+    df.sort_index(inplace=True, ascending=True)
+    return df
+
+
+def get_frequency(df):
+    index_series = df.index.to_series()
+    freq_series = index_series - index_series.shift(1)
+    logging.log(logging.INFO, freq_series.describe())
+    f = freq_series.mode()[0].days
+    if f in range(0, 3):
+        return 250
+    elif f in range(6, 9):
+        return 52
+    elif f in range(13, 18):
+        return 24
+    elif f in range(28, 33):
+        return 12
+    elif f in range(110, 133):
+        return 3
+    else:
+        raise ValueError
+
+
+def get_trade_cal(start_date, end_date, method):
+    if method == 'mysql':
+        sql = 'SELECT cal_date FROM stock_trade_cal WHERE is_open=1'
+        df = pd.read_sql(sql, con)
+        df['end_date'] = pd.to_datetime(df['cal_date'])
+        df.set_index('end_date', drop=False, inplace=True)
+
+    elif method == 'tushare':
+        ts.set_token('ac1f734f8a25651aa07319ca35b1b0c0854e361e306fe85d85e092bc')
+        pro = ts.pro_api()
+        if end_date is not None:
+            df = pro.trade_cal(exchange='SSE', start_date=start_date, end_date=end_date, is_open='1')
+        else:
+            df = pro.trade_cal(exchange='SSE', start_date=start_date, is_open='1')
+        df.drop(['exchange', 'is_open'], axis=1, inplace=True)
+        df.rename(columns={'cal_date': 'end_date'}, inplace=True)
+    return df
+
+
+def get_manager():
+    sql = 'SELECT ts_code, name FROM public_fund_manager WHERE end_date IS NULL'
+    df = pd.read_sql(sql, con)
+    return df
+
+
+def get_fund_info(end_date):
+    sql = "SELECT ts_code, fund_type, management FROM public_fund_basic " \
+          "WHERE delist_date IS NULL AND (due_date IS NULL OR due_date>'{}')".format(end_date.strftime('%Y%m%d'))
+    df = pd.read_sql(sql, con).dropna(how='all')
+    manager_info = get_manager()
+    df = pd.merge(df, manager_info, how="left", on='ts_code')
+    return df
+
+
+def resample(df, trading_cal, freq):
+    """对基金净值表进行粒度不同的重采样，并剔除不在交易日中的结果
+
+    Args:
+        df ([DataFrame]): [原始基金净值表]
+        trading_cal ([DataFrame]): [上交所交易日表]
+        freq ([int]): [重采样频率： 1：工作日，2：周， 3：月， 4：半月， 5：季度]
+
+    Returns:
+        [DataFrame]: [重采样后剔除不在交易日历中的净值表和交易日历以净值日期为索引的合表]
+    """
+    freq_dict = {250: 'B', 52: 'W-FRI', 12: 'M', 24: 'SM', 3: 'Q'}
+    resample_freq = freq_dict[freq]
+    # 按采样频率进行重采样并进行净值的前向填充
+    df = df.resample(rule=resample_freq).ffill()
+
+    # 根据采样频率确定最大日期偏移量(保证偏移后的日期与重采样的日期在同一周，同一月，同一季度等)
+    timeoffset_dict = {250: 1, 52: 5, 12: 30, 24: 15, 3: 120}
+    timeoffsetmax = timeoffset_dict[freq]
+
+    # Dataframe不允许直接修改index，新建一份index的复制并转为list
+    new_index = list(df.index)
+    # 遍历重采样后的日期
+    for idx, date in enumerate(df.index):
+        # 如果重采样后的日期不在交易日历中
+        if date not in trading_cal['end_date']:
+            # 对重采样后的日期进行偏移
+            for time_offset in range(1, timeoffsetmax):
+                # 如果偏移后的日期在交易日历中，保留偏移后的日期
+                if date - datetime.timedelta(days=time_offset) in trading_cal['end_date']:
+                    new_index[idx] = date - datetime.timedelta(days=time_offset)
+                    # 任意一天满足立即退出循环
+                    break
+
+    # 更改净值表的日期索引为重采样后且在交易日内的日期
+    df.index = pd.Series(new_index)
+    return pd.merge(df, trading_cal, how='inner', left_index=True, right_index=True)
+
+
+def z_score(annual_return_rank, downside_risk_rank, max_drawdown_rank, sharp_ratio_rank):
+    return 25 * annual_return_rank + 25 * downside_risk_rank + 25 * max_drawdown_rank + 25 * sharp_ratio_rank
+
+
+def cal_date(date, period_type, period):
+    year, month, day = map(int, date.strftime('%Y-%m-%d').split('-'))
+    if period_type == 'Y':
+        cal_year = year - period
+        return datetime.datetime(cal_year, month, day)
+    elif period_type == 'm':
+        cal_month = month - period
+        if cal_month > 0:
+            return datetime.datetime(year, cal_month, day)
+        else:
+            return datetime.datetime(year - 1, cal_month + 12, day)
+    elif period_type == 'd':
+        return date - datetime.timedelta(days=period)
+
+
+def metric_rank(df):
+    for metric in ['annual_return', 'downside_risk', 'max_drawdown', 'sortino_ratio']:
+        if metric in ['downside_risk', 'max_drawdown']:
+            ascending = False
+        else:
+            ascending = True
+        df['{}_rank'.format(metric)] = df.groupby(['invest_type'])[metric].rank(ascending=ascending, pct=True)
+    return df
+
+
+def public_fund_rank(start_date, end_date):
+    fund_info = get_fund_info(end_date)
+    group = fund_info.groupby('fund_type')
+    grouped_fund = group['ts_code'].unique()
+
+    trading_cal = get_trade_cal(start_date, end_date, method='mysql')
+
+    metric_df = pd.DataFrame(columns=('ts_code', 'range_return', 'annual_return', 'max_drawdown', 'sharp_ratio',
+                                      'volatility', 'sortino_ratio', 'downside_risk', 'invest_type'))
+
+    skipped_funds = []
+    for invest_type in grouped_fund.index:
+        for fund in grouped_fund[invest_type]:
+
+            df = get_dataframe(fund, start_date)
+
+            try:
+                if df.index[-1] - df.index[0] < 0.6 * (end_date - start_date):
+                    skipped_funds.append(fund)
+                n = get_frequency(df)
+            except Exception as e:
+                logging.log(logging.ERROR, repr(e))
+                logging.log(logging.INFO, 'Skipped {}'.format(fund))
+                continue
+
+            df = resample(df, trading_cal, n)
+            _ = get_frequency(df)
+
+            logging.log(logging.INFO, "Dealing with {}".format(fund))
+            net_worth = df['adj_nav'].astype(float)
+
+            end_df, begin_df = net_worth.values[-1], net_worth.values[0]
+
+            sim_return = simple_return(net_worth)
+            ex_return = excess_return(sim_return, bank_rate=0.015, n=n)
+            drawdown = float(max_drawdown(net_worth)[0])
+            shp_ratio = sharpe_ratio(ex_return, sim_return, n)
+            rng_return = float(range_return(end_df, begin_df))
+            ann_return = annual_return(rng_return, net_worth, n)
+            vol = volatility(sim_return, n)
+            down_risk = downside_risk(sim_return, bank_rate=0.015, n=n)
+            sor_ratio = sortino_ratio(ex_return, down_risk, n)
+
+            manager = fund_info[fund_info['ts_code'] == fund]['name'].values
+            management = fund_info[fund_info['ts_code'] == fund]['management'].values
+
+            row = pd.Series([fund, rng_return, ann_return, drawdown, shp_ratio,
+                             vol, sor_ratio, down_risk, invest_type, manager, management],
+                            index=['ts_code', 'range_return', 'annual_return', 'max_drawdown',
+                                   'sharp_ratio', 'volatility', 'sortino_ratio', 'downside_risk',
+                                   'invest_type', 'manager', 'management'])
+            metric_df = metric_df.append(row, ignore_index=True)
+    metric_df.set_index('ts_code', inplace=True)
+
+    df = metric_rank(metric_df)
+    df['z_score'] = z_score(df['annual_return_rank'],
+                            df['downside_risk_rank'],
+                            df['max_drawdown_rank'],
+                            df['sharp_ratio_rank'])
+    return df
+
+
+if __name__ == '__main__':
+    end_date = datetime.datetime.now() - datetime.timedelta(days=1)
+    start_date = cal_date(end_date, 'Y', 1)
+    public_fund_rank = public_fund_rank(start_date, end_date)
+    public_fund_rank.to_csv('public_fund_rank.csv', encoding='gbk')
--- a/app/utils/week_evaluation.py
+++ b/app/utils/week_evaluation.py
+# coding: utf-8
+
+"""
+计算各个指标的方法
+
+"""
+import pandas as pd
+import numpy as np
+import datetime
+import calendar
+import math
+
+
+def simple_return(net_worth):
+    """
+    简单收益率
+    net_worth：净值或指数数据
+    """
+    d = net_worth / net_worth.shift(1) - 1
+    d.iloc[0] = 0
+    return d
+
+
+def excess_return(returns, bank_rate, n):
+    """
+    超额收益率
+    returns：简单收益率
+    bank_rate: 银行收益率, 是已经除过的无风险收益。也可以是其他的基准收益
+    n: 数据类型， 周（52）， 月（12）， 日（250）
+    """
+    d = returns.mean() - bank_rate / n
+    # print(pd.Series(d*np.ones(len(returns))))
+    return d
+    # pd.Series(d*np.ones(len(returns)))
+
+
+def sharpe_ratio(excess_return, simple_return, n):
+    """
+    夏普比率
+    excess_return: 超额收益率
+    simple_return: 简单收益率
+    n: 数据类型， 周（52）， 月（12）， 日（250）
+    """
+    import math
+    d = math.sqrt(n) * excess_return.mean() / simple_return.std(ddof=1)
+    return d
+
+
+def volatility(simple_return, n):
+    """
+    波动率
+    :param simple_return:
+    :param n:数据类型， 周（52）， 月（12）， 日（250）
+    :return:
+    """
+    d = math.sqrt(n) * simple_return.std(ddof=1)
+    return d
+
+
+def IR(excess_return, n):
+    """
+    excess_return: 收益减去基准收益率
+    """
+    d = math.sqrt(n) * excess_return.mean() / excess_return.std(ddof=1)
+    return d
+
+
+def max_drawdown(return_list):
+    """
+    最大回撤
+    return_list：净值或指数数据的列表
+    返回最大回撤值，以及开始位置，和结束位置值
+    """
+    i = np.argmax((np.maximum.accumulate(return_list) - return_list) / np.maximum.accumulate(return_list))  # 结束位置
+    if i == 0:
+        return 0, 0, 0  # 没有回撤
+    j = np.argmax(return_list[:i])  # 开始位置
+    return (return_list[j] - return_list[i]) / (return_list[j]), j, i
+
+
+def month_differ(x, y):
+    """
+    计算月份相差
+    只根据month，year计算相差月份, 没有考虑day
+    :param x: datetime.datetime
+    :param y:
+    :return:
+    """
+    m_d = abs((x.year - y.year) * 12 + (x.month - y.month) * 1)
+    return m_d
+
+
+def downside_risk(r, bank_rate, n):
+    """
+    下行风险
+    r: 简单收益率
+    """
+    _r = r.map(lambda x: x / 100)
+    # mean = _r.mean()
+    r_adjust = -r.map(lambda x: min(x - bank_rate / n, 0))
+    risk = np.sqrt((r_adjust ** 2).mean() * len(r_adjust) / (len(r_adjust) - 1))
+    return risk
+
+
+def sortino_ratio(excess_return, downside, n):
+    """
+    索提诺比率
+    df: 净值或指数数据
+    """
+    import math
+    sortino_ratio = math.sqrt(n) * excess_return.mean() / downside
+    return sortino_ratio
+
+
+def month_minus(date, n):
+    """
+    计算对标的前几个月份，如2020,3的前三个月是2019.12
+    输入datetime格式
+    注意：二月份没有30,31号的，而且3月31号，的前几个月有的是没有31号的。
+    :return:
+    """
+    # day = date.day
+    if date.month > n:
+        month = date.month - n
+        year = date.year
+    else:
+        month = date.month + 12 - n
+        year = date.year - 1
+    # print('month////',month)
+    try:
+        pre_date = datetime.datetime(year, month, date.day)
+    except:
+        pre_date = datetime.datetime(year, month, calendar.monthrange(year, month)[1])
+    return pre_date
+
+
+def is_exsits(a, b):
+    """
+    判断日期是否存在， 将日期与基金最开始的时间对比， 如果存在，返回日期， 不存在，返回None
+    :param a: 基金初始时间
+    :param b: 需要计算收益的起始时间
+    :return:
+    """
+    if a < b:
+        return True
+    else:
+        return False
+
+
+def year_minus(date, n):
+    """
+    计算对标的前几个年份，如2020.3的前1年是2019.3
+    输入datetime格式
+    :return:
+    """
+    day = date.day
+    month = date.month
+    year = date.year - n
+    pre_date = datetime.datetime(year, month, day)
+    return pre_date
+
+
+def range_return(end_df, begin_df):
+    """
+    区间收益
+    """
+    d = end_df / begin_df - 1
+    return d
+
+
+def annual_return(range_return, df, n):
+    """
+    年化收益
+    """
+    d = (1 + range_return) ** (n / len(df)) - 1
+    return d
+
+
+def gain_loss_ratio(simple_return):
+    """
+    盈亏比
+    """
+    pos = simple_return[simple_return >= 0].sum()
+    neg = simple_return[simple_return < 0].sum()
+    d = - pos / neg
+    return d
+
+
+def alpha_beta(simple_return, b_simple_return, n):
+    """
+    alpha, beta
+    """
+    df = pd.DataFrame()
+    from sklearn.linear_model import LinearRegression
+    linreg = LinearRegression()
+    l = len(simple_return)
+    df['returns'] = simple_return
+    df['b_returns'] = b_simple_return
+    X = np.array(df[['b_returns']][:l - 1])
+    y = np.array(df[['returns']][:l - 1])
+    linreg.fit(X, y)
+
+    beta = linreg.coef_[0][0]
+    alpha = linreg.intercept_[0] * n
+    return alpha, beta
+
+
+def win_rate(simple_return, b_simple_return):
+    """
+    胜率
+    """
+    df = pd.DataFrame()
+    df['diff'] = simple_return - b_simple_return
+    d = df[df['diff'] >= 0]['diff'].count() / df['diff'].count()
+    return d
+
+
+def lpm(returns, threshold, order):
+    """
+    下偏距， 一阶和二阶
+    order： 是一阶和二阶的设定
+    threshold: 是期望收益率
+    """
+    # This method returns a lower partial moment of the returns
+    # Create an array he same length as returns containing the minimum return threshold
+    threshold_array = np.empty(len(returns))
+    threshold_array.fill(threshold)
+    # Calculate the difference between the threshold and the returns
+    diff = threshold_array - returns
+    # Set the minimum of each to 0
+    diff = diff.clip(min=0)
+    # Return the sum of the different to the power of order
+    return np.sum(diff ** order) / len(returns)
+
+
+def var(returns, alpha):
+    """
+    计算var值，历史收益率方法， 将历史收益率由小到大排序，去置信区间的分位点， alpha是置信区间
+    """
+    # This method calculates the historical simulation var of the returns
+    sorted_returns = np.sort(returns)
+    # Calculate the index associated with alpha
+    index = int(alpha * len(sorted_returns))
+    # VaR should be positive
+    return abs(sorted_returns[index])
+
+
+def cvar(returns, alpha):
+    # This method calculates the condition VaR of the returns
+    sorted_returns = np.sort(returns)
+    # Calculate the index associated with alpha
+    index = int(alpha * len(sorted_returns))
+    # Calculate the total VaR beyond alpha
+    sum_var = sorted_returns[0]
+    for i in range(1, index):
+        sum_var += sorted_returns[i]
+    # Return the average VaR
+    # CVaR should be positive
+    return abs(sum_var / index)