Commit c55c7355 authored by 李宗熹's avatar 李宗熹

Add fund rank

parent 68e3f7f9
import pymysql
import tushare as ts
import logging
logging.basicConfig(level=logging.DEBUG)
from week_evaluation import *
con = pymysql.connect(host='tamper.mysql.polardb.rds.aliyuncs.com',
user='tamp_fund',
password='@imeng408',
database='tamp_fund',
charset='utf8',
use_unicode='True')
def get_dataframe(fund, start_date, rollback=False):
sql = "SELECT ts_code, end_date, adj_nav FROM public_fund_nav " \
"WHERE ts_code='{0}'".format(fund)
df = pd.read_sql(sql, con).dropna(how='any')
if df['adj_nav'].count() == 0:
logging.log(logging.ERROR, "CAN NOT FIND {}".format(fund))
return None
df['end_date'] = pd.to_datetime(df['end_date'])
if rollback:
while start_date not in list(df['end_date']):
start_date = start_date - datetime.timedelta(days=1)
df = df[df['end_date'] >= start_date]
df.drop_duplicates(subset='end_date', inplace=True, keep='first')
df.set_index('end_date', inplace=True)
df.sort_index(inplace=True, ascending=True)
return df
def get_frequency(df):
index_series = df.index.to_series()
freq_series = index_series - index_series.shift(1)
logging.log(logging.INFO, freq_series.describe())
f = freq_series.mode()[0].days
if f in range(0, 3):
return 250
elif f in range(6, 9):
return 52
elif f in range(13, 18):
return 24
elif f in range(28, 33):
return 12
elif f in range(110, 133):
return 3
else:
raise ValueError
def get_trade_cal(start_date, end_date, method):
if method == 'mysql':
sql = 'SELECT cal_date FROM stock_trade_cal WHERE is_open=1'
df = pd.read_sql(sql, con)
df['end_date'] = pd.to_datetime(df['cal_date'])
df.set_index('end_date', drop=False, inplace=True)
elif method == 'tushare':
ts.set_token('ac1f734f8a25651aa07319ca35b1b0c0854e361e306fe85d85e092bc')
pro = ts.pro_api()
if end_date is not None:
df = pro.trade_cal(exchange='SSE', start_date=start_date, end_date=end_date, is_open='1')
else:
df = pro.trade_cal(exchange='SSE', start_date=start_date, is_open='1')
df.drop(['exchange', 'is_open'], axis=1, inplace=True)
df.rename(columns={'cal_date': 'end_date'}, inplace=True)
return df
def get_manager():
sql = 'SELECT ts_code, name FROM public_fund_manager WHERE end_date IS NULL'
df = pd.read_sql(sql, con)
return df
def get_fund_info(end_date):
sql = "SELECT ts_code, fund_type, management FROM public_fund_basic " \
"WHERE delist_date IS NULL AND (due_date IS NULL OR due_date>'{}')".format(end_date.strftime('%Y%m%d'))
df = pd.read_sql(sql, con).dropna(how='all')
manager_info = get_manager()
df = pd.merge(df, manager_info, how="left", on='ts_code')
return df
def resample(df, trading_cal, freq):
"""对基金净值表进行粒度不同的重采样,并剔除不在交易日中的结果
Args:
df ([DataFrame]): [原始基金净值表]
trading_cal ([DataFrame]): [上交所交易日表]
freq ([int]): [重采样频率: 1:工作日,2:周, 3:月, 4:半月, 5:季度]
Returns:
[DataFrame]: [重采样后剔除不在交易日历中的净值表和交易日历以净值日期为索引的合表]
"""
freq_dict = {250: 'B', 52: 'W-FRI', 12: 'M', 24: 'SM', 3: 'Q'}
resample_freq = freq_dict[freq]
# 按采样频率进行重采样并进行净值的前向填充
df = df.resample(rule=resample_freq).ffill()
# 根据采样频率确定最大日期偏移量(保证偏移后的日期与重采样的日期在同一周,同一月,同一季度等)
timeoffset_dict = {250: 1, 52: 5, 12: 30, 24: 15, 3: 120}
timeoffsetmax = timeoffset_dict[freq]
# Dataframe不允许直接修改index,新建一份index的复制并转为list
new_index = list(df.index)
# 遍历重采样后的日期
for idx, date in enumerate(df.index):
# 如果重采样后的日期不在交易日历中
if date not in trading_cal['end_date']:
# 对重采样后的日期进行偏移
for time_offset in range(1, timeoffsetmax):
# 如果偏移后的日期在交易日历中,保留偏移后的日期
if date - datetime.timedelta(days=time_offset) in trading_cal['end_date']:
new_index[idx] = date - datetime.timedelta(days=time_offset)
# 任意一天满足立即退出循环
break
# 更改净值表的日期索引为重采样后且在交易日内的日期
df.index = pd.Series(new_index)
return pd.merge(df, trading_cal, how='inner', left_index=True, right_index=True)
def z_score(annual_return_rank, downside_risk_rank, max_drawdown_rank, sharp_ratio_rank):
return 25 * annual_return_rank + 25 * downside_risk_rank + 25 * max_drawdown_rank + 25 * sharp_ratio_rank
def cal_date(date, period_type, period):
year, month, day = map(int, date.strftime('%Y-%m-%d').split('-'))
if period_type == 'Y':
cal_year = year - period
return datetime.datetime(cal_year, month, day)
elif period_type == 'm':
cal_month = month - period
if cal_month > 0:
return datetime.datetime(year, cal_month, day)
else:
return datetime.datetime(year - 1, cal_month + 12, day)
elif period_type == 'd':
return date - datetime.timedelta(days=period)
def metric_rank(df):
for metric in ['annual_return', 'downside_risk', 'max_drawdown', 'sortino_ratio']:
if metric in ['downside_risk', 'max_drawdown']:
ascending = False
else:
ascending = True
df['{}_rank'.format(metric)] = df.groupby(['invest_type'])[metric].rank(ascending=ascending, pct=True)
return df
def public_fund_rank(start_date, end_date):
fund_info = get_fund_info(end_date)
group = fund_info.groupby('fund_type')
grouped_fund = group['ts_code'].unique()
trading_cal = get_trade_cal(start_date, end_date, method='mysql')
metric_df = pd.DataFrame(columns=('ts_code', 'range_return', 'annual_return', 'max_drawdown', 'sharp_ratio',
'volatility', 'sortino_ratio', 'downside_risk', 'invest_type'))
skipped_funds = []
for invest_type in grouped_fund.index:
for fund in grouped_fund[invest_type]:
df = get_dataframe(fund, start_date)
try:
if df.index[-1] - df.index[0] < 0.6 * (end_date - start_date):
skipped_funds.append(fund)
n = get_frequency(df)
except Exception as e:
logging.log(logging.ERROR, repr(e))
logging.log(logging.INFO, 'Skipped {}'.format(fund))
continue
df = resample(df, trading_cal, n)
_ = get_frequency(df)
logging.log(logging.INFO, "Dealing with {}".format(fund))
net_worth = df['adj_nav'].astype(float)
end_df, begin_df = net_worth.values[-1], net_worth.values[0]
sim_return = simple_return(net_worth)
ex_return = excess_return(sim_return, bank_rate=0.015, n=n)
drawdown = float(max_drawdown(net_worth)[0])
shp_ratio = sharpe_ratio(ex_return, sim_return, n)
rng_return = float(range_return(end_df, begin_df))
ann_return = annual_return(rng_return, net_worth, n)
vol = volatility(sim_return, n)
down_risk = downside_risk(sim_return, bank_rate=0.015, n=n)
sor_ratio = sortino_ratio(ex_return, down_risk, n)
manager = fund_info[fund_info['ts_code'] == fund]['name'].values
management = fund_info[fund_info['ts_code'] == fund]['management'].values
row = pd.Series([fund, rng_return, ann_return, drawdown, shp_ratio,
vol, sor_ratio, down_risk, invest_type, manager, management],
index=['ts_code', 'range_return', 'annual_return', 'max_drawdown',
'sharp_ratio', 'volatility', 'sortino_ratio', 'downside_risk',
'invest_type', 'manager', 'management'])
metric_df = metric_df.append(row, ignore_index=True)
metric_df.set_index('ts_code', inplace=True)
df = metric_rank(metric_df)
df['z_score'] = z_score(df['annual_return_rank'],
df['downside_risk_rank'],
df['max_drawdown_rank'],
df['sharp_ratio_rank'])
return df
if __name__ == '__main__':
end_date = datetime.datetime.now() - datetime.timedelta(days=1)
start_date = cal_date(end_date, 'Y', 1)
public_fund_rank = public_fund_rank(start_date, end_date)
public_fund_rank.to_csv('public_fund_rank.csv', encoding='gbk')
# coding: utf-8
"""
计算各个指标的方法
"""
import pandas as pd
import numpy as np
import datetime
import calendar
import math
def simple_return(net_worth):
"""
简单收益率
net_worth:净值或指数数据
"""
d = net_worth / net_worth.shift(1) - 1
d.iloc[0] = 0
return d
def excess_return(returns, bank_rate, n):
"""
超额收益率
returns:简单收益率
bank_rate: 银行收益率, 是已经除过的无风险收益。也可以是其他的基准收益
n: 数据类型, 周(52), 月(12), 日(250)
"""
d = returns.mean() - bank_rate / n
# print(pd.Series(d*np.ones(len(returns))))
return d
# pd.Series(d*np.ones(len(returns)))
def sharpe_ratio(excess_return, simple_return, n):
"""
夏普比率
excess_return: 超额收益率
simple_return: 简单收益率
n: 数据类型, 周(52), 月(12), 日(250)
"""
import math
d = math.sqrt(n) * excess_return.mean() / simple_return.std(ddof=1)
return d
def volatility(simple_return, n):
"""
波动率
:param simple_return:
:param n:数据类型, 周(52), 月(12), 日(250)
:return:
"""
d = math.sqrt(n) * simple_return.std(ddof=1)
return d
def IR(excess_return, n):
"""
excess_return: 收益减去基准收益率
"""
d = math.sqrt(n) * excess_return.mean() / excess_return.std(ddof=1)
return d
def max_drawdown(return_list):
"""
最大回撤
return_list:净值或指数数据的列表
返回最大回撤值,以及开始位置,和结束位置值
"""
i = np.argmax((np.maximum.accumulate(return_list) - return_list) / np.maximum.accumulate(return_list)) # 结束位置
if i == 0:
return 0, 0, 0 # 没有回撤
j = np.argmax(return_list[:i]) # 开始位置
return (return_list[j] - return_list[i]) / (return_list[j]), j, i
def month_differ(x, y):
"""
计算月份相差
只根据month,year计算相差月份, 没有考虑day
:param x: datetime.datetime
:param y:
:return:
"""
m_d = abs((x.year - y.year) * 12 + (x.month - y.month) * 1)
return m_d
def downside_risk(r, bank_rate, n):
"""
下行风险
r: 简单收益率
"""
_r = r.map(lambda x: x / 100)
# mean = _r.mean()
r_adjust = -r.map(lambda x: min(x - bank_rate / n, 0))
risk = np.sqrt((r_adjust ** 2).mean() * len(r_adjust) / (len(r_adjust) - 1))
return risk
def sortino_ratio(excess_return, downside, n):
"""
索提诺比率
df: 净值或指数数据
"""
import math
sortino_ratio = math.sqrt(n) * excess_return.mean() / downside
return sortino_ratio
def month_minus(date, n):
"""
计算对标的前几个月份,如2020,3的前三个月是2019.12
输入datetime格式
注意:二月份没有30,31号的,而且3月31号,的前几个月有的是没有31号的。
:return:
"""
# day = date.day
if date.month > n:
month = date.month - n
year = date.year
else:
month = date.month + 12 - n
year = date.year - 1
# print('month////',month)
try:
pre_date = datetime.datetime(year, month, date.day)
except:
pre_date = datetime.datetime(year, month, calendar.monthrange(year, month)[1])
return pre_date
def is_exsits(a, b):
"""
判断日期是否存在, 将日期与基金最开始的时间对比, 如果存在,返回日期, 不存在,返回None
:param a: 基金初始时间
:param b: 需要计算收益的起始时间
:return:
"""
if a < b:
return True
else:
return False
def year_minus(date, n):
"""
计算对标的前几个年份,如2020.3的前1年是2019.3
输入datetime格式
:return:
"""
day = date.day
month = date.month
year = date.year - n
pre_date = datetime.datetime(year, month, day)
return pre_date
def range_return(end_df, begin_df):
"""
区间收益
"""
d = end_df / begin_df - 1
return d
def annual_return(range_return, df, n):
"""
年化收益
"""
d = (1 + range_return) ** (n / len(df)) - 1
return d
def gain_loss_ratio(simple_return):
"""
盈亏比
"""
pos = simple_return[simple_return >= 0].sum()
neg = simple_return[simple_return < 0].sum()
d = - pos / neg
return d
def alpha_beta(simple_return, b_simple_return, n):
"""
alpha, beta
"""
df = pd.DataFrame()
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
l = len(simple_return)
df['returns'] = simple_return
df['b_returns'] = b_simple_return
X = np.array(df[['b_returns']][:l - 1])
y = np.array(df[['returns']][:l - 1])
linreg.fit(X, y)
beta = linreg.coef_[0][0]
alpha = linreg.intercept_[0] * n
return alpha, beta
def win_rate(simple_return, b_simple_return):
"""
胜率
"""
df = pd.DataFrame()
df['diff'] = simple_return - b_simple_return
d = df[df['diff'] >= 0]['diff'].count() / df['diff'].count()
return d
def lpm(returns, threshold, order):
"""
下偏距, 一阶和二阶
order: 是一阶和二阶的设定
threshold: 是期望收益率
"""
# This method returns a lower partial moment of the returns
# Create an array he same length as returns containing the minimum return threshold
threshold_array = np.empty(len(returns))
threshold_array.fill(threshold)
# Calculate the difference between the threshold and the returns
diff = threshold_array - returns
# Set the minimum of each to 0
diff = diff.clip(min=0)
# Return the sum of the different to the power of order
return np.sum(diff ** order) / len(returns)
def var(returns, alpha):
"""
计算var值,历史收益率方法, 将历史收益率由小到大排序,去置信区间的分位点, alpha是置信区间
"""
# This method calculates the historical simulation var of the returns
sorted_returns = np.sort(returns)
# Calculate the index associated with alpha
index = int(alpha * len(sorted_returns))
# VaR should be positive
return abs(sorted_returns[index])
def cvar(returns, alpha):
# This method calculates the condition VaR of the returns
sorted_returns = np.sort(returns)
# Calculate the index associated with alpha
index = int(alpha * len(sorted_returns))
# Calculate the total VaR beyond alpha
sum_var = sorted_returns[0]
for i in range(1, index):
sum_var += sorted_returns[i]
# Return the average VaR
# CVaR should be positive
return abs(sum_var / index)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment