Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Sign in
Toggle navigation
F
fund_report
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
彭熊
fund_report
Commits
c55c7355
Commit
c55c7355
authored
Nov 19, 2020
by
李宗熹
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add fund rank
parent
68e3f7f9
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
484 additions
and
0 deletions
+484
-0
fund_rank.py
app/utils/fund_rank.py
+225
-0
week_evaluation.py
app/utils/week_evaluation.py
+259
-0
No files found.
app/utils/fund_rank.py
0 → 100644
View file @
c55c7355
import
pymysql
import
tushare
as
ts
import
logging
logging
.
basicConfig
(
level
=
logging
.
DEBUG
)
from
week_evaluation
import
*
con
=
pymysql
.
connect
(
host
=
'tamper.mysql.polardb.rds.aliyuncs.com'
,
user
=
'tamp_fund'
,
password
=
'@imeng408'
,
database
=
'tamp_fund'
,
charset
=
'utf8'
,
use_unicode
=
'True'
)
def
get_dataframe
(
fund
,
start_date
,
rollback
=
False
):
sql
=
"SELECT ts_code, end_date, adj_nav FROM public_fund_nav "
\
"WHERE ts_code='{0}'"
.
format
(
fund
)
df
=
pd
.
read_sql
(
sql
,
con
)
.
dropna
(
how
=
'any'
)
if
df
[
'adj_nav'
]
.
count
()
==
0
:
logging
.
log
(
logging
.
ERROR
,
"CAN NOT FIND {}"
.
format
(
fund
))
return
None
df
[
'end_date'
]
=
pd
.
to_datetime
(
df
[
'end_date'
])
if
rollback
:
while
start_date
not
in
list
(
df
[
'end_date'
]):
start_date
=
start_date
-
datetime
.
timedelta
(
days
=
1
)
df
=
df
[
df
[
'end_date'
]
>=
start_date
]
df
.
drop_duplicates
(
subset
=
'end_date'
,
inplace
=
True
,
keep
=
'first'
)
df
.
set_index
(
'end_date'
,
inplace
=
True
)
df
.
sort_index
(
inplace
=
True
,
ascending
=
True
)
return
df
def
get_frequency
(
df
):
index_series
=
df
.
index
.
to_series
()
freq_series
=
index_series
-
index_series
.
shift
(
1
)
logging
.
log
(
logging
.
INFO
,
freq_series
.
describe
())
f
=
freq_series
.
mode
()[
0
]
.
days
if
f
in
range
(
0
,
3
):
return
250
elif
f
in
range
(
6
,
9
):
return
52
elif
f
in
range
(
13
,
18
):
return
24
elif
f
in
range
(
28
,
33
):
return
12
elif
f
in
range
(
110
,
133
):
return
3
else
:
raise
ValueError
def
get_trade_cal
(
start_date
,
end_date
,
method
):
if
method
==
'mysql'
:
sql
=
'SELECT cal_date FROM stock_trade_cal WHERE is_open=1'
df
=
pd
.
read_sql
(
sql
,
con
)
df
[
'end_date'
]
=
pd
.
to_datetime
(
df
[
'cal_date'
])
df
.
set_index
(
'end_date'
,
drop
=
False
,
inplace
=
True
)
elif
method
==
'tushare'
:
ts
.
set_token
(
'ac1f734f8a25651aa07319ca35b1b0c0854e361e306fe85d85e092bc'
)
pro
=
ts
.
pro_api
()
if
end_date
is
not
None
:
df
=
pro
.
trade_cal
(
exchange
=
'SSE'
,
start_date
=
start_date
,
end_date
=
end_date
,
is_open
=
'1'
)
else
:
df
=
pro
.
trade_cal
(
exchange
=
'SSE'
,
start_date
=
start_date
,
is_open
=
'1'
)
df
.
drop
([
'exchange'
,
'is_open'
],
axis
=
1
,
inplace
=
True
)
df
.
rename
(
columns
=
{
'cal_date'
:
'end_date'
},
inplace
=
True
)
return
df
def
get_manager
():
sql
=
'SELECT ts_code, name FROM public_fund_manager WHERE end_date IS NULL'
df
=
pd
.
read_sql
(
sql
,
con
)
return
df
def
get_fund_info
(
end_date
):
sql
=
"SELECT ts_code, fund_type, management FROM public_fund_basic "
\
"WHERE delist_date IS NULL AND (due_date IS NULL OR due_date>'{}')"
.
format
(
end_date
.
strftime
(
'
%
Y
%
m
%
d'
))
df
=
pd
.
read_sql
(
sql
,
con
)
.
dropna
(
how
=
'all'
)
manager_info
=
get_manager
()
df
=
pd
.
merge
(
df
,
manager_info
,
how
=
"left"
,
on
=
'ts_code'
)
return
df
def
resample
(
df
,
trading_cal
,
freq
):
"""对基金净值表进行粒度不同的重采样,并剔除不在交易日中的结果
Args:
df ([DataFrame]): [原始基金净值表]
trading_cal ([DataFrame]): [上交所交易日表]
freq ([int]): [重采样频率: 1:工作日,2:周, 3:月, 4:半月, 5:季度]
Returns:
[DataFrame]: [重采样后剔除不在交易日历中的净值表和交易日历以净值日期为索引的合表]
"""
freq_dict
=
{
250
:
'B'
,
52
:
'W-FRI'
,
12
:
'M'
,
24
:
'SM'
,
3
:
'Q'
}
resample_freq
=
freq_dict
[
freq
]
# 按采样频率进行重采样并进行净值的前向填充
df
=
df
.
resample
(
rule
=
resample_freq
)
.
ffill
()
# 根据采样频率确定最大日期偏移量(保证偏移后的日期与重采样的日期在同一周,同一月,同一季度等)
timeoffset_dict
=
{
250
:
1
,
52
:
5
,
12
:
30
,
24
:
15
,
3
:
120
}
timeoffsetmax
=
timeoffset_dict
[
freq
]
# Dataframe不允许直接修改index,新建一份index的复制并转为list
new_index
=
list
(
df
.
index
)
# 遍历重采样后的日期
for
idx
,
date
in
enumerate
(
df
.
index
):
# 如果重采样后的日期不在交易日历中
if
date
not
in
trading_cal
[
'end_date'
]:
# 对重采样后的日期进行偏移
for
time_offset
in
range
(
1
,
timeoffsetmax
):
# 如果偏移后的日期在交易日历中,保留偏移后的日期
if
date
-
datetime
.
timedelta
(
days
=
time_offset
)
in
trading_cal
[
'end_date'
]:
new_index
[
idx
]
=
date
-
datetime
.
timedelta
(
days
=
time_offset
)
# 任意一天满足立即退出循环
break
# 更改净值表的日期索引为重采样后且在交易日内的日期
df
.
index
=
pd
.
Series
(
new_index
)
return
pd
.
merge
(
df
,
trading_cal
,
how
=
'inner'
,
left_index
=
True
,
right_index
=
True
)
def
z_score
(
annual_return_rank
,
downside_risk_rank
,
max_drawdown_rank
,
sharp_ratio_rank
):
return
25
*
annual_return_rank
+
25
*
downside_risk_rank
+
25
*
max_drawdown_rank
+
25
*
sharp_ratio_rank
def
cal_date
(
date
,
period_type
,
period
):
year
,
month
,
day
=
map
(
int
,
date
.
strftime
(
'
%
Y-
%
m-
%
d'
)
.
split
(
'-'
))
if
period_type
==
'Y'
:
cal_year
=
year
-
period
return
datetime
.
datetime
(
cal_year
,
month
,
day
)
elif
period_type
==
'm'
:
cal_month
=
month
-
period
if
cal_month
>
0
:
return
datetime
.
datetime
(
year
,
cal_month
,
day
)
else
:
return
datetime
.
datetime
(
year
-
1
,
cal_month
+
12
,
day
)
elif
period_type
==
'd'
:
return
date
-
datetime
.
timedelta
(
days
=
period
)
def
metric_rank
(
df
):
for
metric
in
[
'annual_return'
,
'downside_risk'
,
'max_drawdown'
,
'sortino_ratio'
]:
if
metric
in
[
'downside_risk'
,
'max_drawdown'
]:
ascending
=
False
else
:
ascending
=
True
df
[
'{}_rank'
.
format
(
metric
)]
=
df
.
groupby
([
'invest_type'
])[
metric
]
.
rank
(
ascending
=
ascending
,
pct
=
True
)
return
df
def
public_fund_rank
(
start_date
,
end_date
):
fund_info
=
get_fund_info
(
end_date
)
group
=
fund_info
.
groupby
(
'fund_type'
)
grouped_fund
=
group
[
'ts_code'
]
.
unique
()
trading_cal
=
get_trade_cal
(
start_date
,
end_date
,
method
=
'mysql'
)
metric_df
=
pd
.
DataFrame
(
columns
=
(
'ts_code'
,
'range_return'
,
'annual_return'
,
'max_drawdown'
,
'sharp_ratio'
,
'volatility'
,
'sortino_ratio'
,
'downside_risk'
,
'invest_type'
))
skipped_funds
=
[]
for
invest_type
in
grouped_fund
.
index
:
for
fund
in
grouped_fund
[
invest_type
]:
df
=
get_dataframe
(
fund
,
start_date
)
try
:
if
df
.
index
[
-
1
]
-
df
.
index
[
0
]
<
0.6
*
(
end_date
-
start_date
):
skipped_funds
.
append
(
fund
)
n
=
get_frequency
(
df
)
except
Exception
as
e
:
logging
.
log
(
logging
.
ERROR
,
repr
(
e
))
logging
.
log
(
logging
.
INFO
,
'Skipped {}'
.
format
(
fund
))
continue
df
=
resample
(
df
,
trading_cal
,
n
)
_
=
get_frequency
(
df
)
logging
.
log
(
logging
.
INFO
,
"Dealing with {}"
.
format
(
fund
))
net_worth
=
df
[
'adj_nav'
]
.
astype
(
float
)
end_df
,
begin_df
=
net_worth
.
values
[
-
1
],
net_worth
.
values
[
0
]
sim_return
=
simple_return
(
net_worth
)
ex_return
=
excess_return
(
sim_return
,
bank_rate
=
0.015
,
n
=
n
)
drawdown
=
float
(
max_drawdown
(
net_worth
)[
0
])
shp_ratio
=
sharpe_ratio
(
ex_return
,
sim_return
,
n
)
rng_return
=
float
(
range_return
(
end_df
,
begin_df
))
ann_return
=
annual_return
(
rng_return
,
net_worth
,
n
)
vol
=
volatility
(
sim_return
,
n
)
down_risk
=
downside_risk
(
sim_return
,
bank_rate
=
0.015
,
n
=
n
)
sor_ratio
=
sortino_ratio
(
ex_return
,
down_risk
,
n
)
manager
=
fund_info
[
fund_info
[
'ts_code'
]
==
fund
][
'name'
]
.
values
management
=
fund_info
[
fund_info
[
'ts_code'
]
==
fund
][
'management'
]
.
values
row
=
pd
.
Series
([
fund
,
rng_return
,
ann_return
,
drawdown
,
shp_ratio
,
vol
,
sor_ratio
,
down_risk
,
invest_type
,
manager
,
management
],
index
=
[
'ts_code'
,
'range_return'
,
'annual_return'
,
'max_drawdown'
,
'sharp_ratio'
,
'volatility'
,
'sortino_ratio'
,
'downside_risk'
,
'invest_type'
,
'manager'
,
'management'
])
metric_df
=
metric_df
.
append
(
row
,
ignore_index
=
True
)
metric_df
.
set_index
(
'ts_code'
,
inplace
=
True
)
df
=
metric_rank
(
metric_df
)
df
[
'z_score'
]
=
z_score
(
df
[
'annual_return_rank'
],
df
[
'downside_risk_rank'
],
df
[
'max_drawdown_rank'
],
df
[
'sharp_ratio_rank'
])
return
df
if
__name__
==
'__main__'
:
end_date
=
datetime
.
datetime
.
now
()
-
datetime
.
timedelta
(
days
=
1
)
start_date
=
cal_date
(
end_date
,
'Y'
,
1
)
public_fund_rank
=
public_fund_rank
(
start_date
,
end_date
)
public_fund_rank
.
to_csv
(
'public_fund_rank.csv'
,
encoding
=
'gbk'
)
app/utils/week_evaluation.py
0 → 100644
View file @
c55c7355
# coding: utf-8
"""
计算各个指标的方法
"""
import
pandas
as
pd
import
numpy
as
np
import
datetime
import
calendar
import
math
def
simple_return
(
net_worth
):
"""
简单收益率
net_worth:净值或指数数据
"""
d
=
net_worth
/
net_worth
.
shift
(
1
)
-
1
d
.
iloc
[
0
]
=
0
return
d
def
excess_return
(
returns
,
bank_rate
,
n
):
"""
超额收益率
returns:简单收益率
bank_rate: 银行收益率, 是已经除过的无风险收益。也可以是其他的基准收益
n: 数据类型, 周(52), 月(12), 日(250)
"""
d
=
returns
.
mean
()
-
bank_rate
/
n
# print(pd.Series(d*np.ones(len(returns))))
return
d
# pd.Series(d*np.ones(len(returns)))
def
sharpe_ratio
(
excess_return
,
simple_return
,
n
):
"""
夏普比率
excess_return: 超额收益率
simple_return: 简单收益率
n: 数据类型, 周(52), 月(12), 日(250)
"""
import
math
d
=
math
.
sqrt
(
n
)
*
excess_return
.
mean
()
/
simple_return
.
std
(
ddof
=
1
)
return
d
def
volatility
(
simple_return
,
n
):
"""
波动率
:param simple_return:
:param n:数据类型, 周(52), 月(12), 日(250)
:return:
"""
d
=
math
.
sqrt
(
n
)
*
simple_return
.
std
(
ddof
=
1
)
return
d
def
IR
(
excess_return
,
n
):
"""
excess_return: 收益减去基准收益率
"""
d
=
math
.
sqrt
(
n
)
*
excess_return
.
mean
()
/
excess_return
.
std
(
ddof
=
1
)
return
d
def
max_drawdown
(
return_list
):
"""
最大回撤
return_list:净值或指数数据的列表
返回最大回撤值,以及开始位置,和结束位置值
"""
i
=
np
.
argmax
((
np
.
maximum
.
accumulate
(
return_list
)
-
return_list
)
/
np
.
maximum
.
accumulate
(
return_list
))
# 结束位置
if
i
==
0
:
return
0
,
0
,
0
# 没有回撤
j
=
np
.
argmax
(
return_list
[:
i
])
# 开始位置
return
(
return_list
[
j
]
-
return_list
[
i
])
/
(
return_list
[
j
]),
j
,
i
def
month_differ
(
x
,
y
):
"""
计算月份相差
只根据month,year计算相差月份, 没有考虑day
:param x: datetime.datetime
:param y:
:return:
"""
m_d
=
abs
((
x
.
year
-
y
.
year
)
*
12
+
(
x
.
month
-
y
.
month
)
*
1
)
return
m_d
def
downside_risk
(
r
,
bank_rate
,
n
):
"""
下行风险
r: 简单收益率
"""
_r
=
r
.
map
(
lambda
x
:
x
/
100
)
# mean = _r.mean()
r_adjust
=
-
r
.
map
(
lambda
x
:
min
(
x
-
bank_rate
/
n
,
0
))
risk
=
np
.
sqrt
((
r_adjust
**
2
)
.
mean
()
*
len
(
r_adjust
)
/
(
len
(
r_adjust
)
-
1
))
return
risk
def
sortino_ratio
(
excess_return
,
downside
,
n
):
"""
索提诺比率
df: 净值或指数数据
"""
import
math
sortino_ratio
=
math
.
sqrt
(
n
)
*
excess_return
.
mean
()
/
downside
return
sortino_ratio
def
month_minus
(
date
,
n
):
"""
计算对标的前几个月份,如2020,3的前三个月是2019.12
输入datetime格式
注意:二月份没有30,31号的,而且3月31号,的前几个月有的是没有31号的。
:return:
"""
# day = date.day
if
date
.
month
>
n
:
month
=
date
.
month
-
n
year
=
date
.
year
else
:
month
=
date
.
month
+
12
-
n
year
=
date
.
year
-
1
# print('month////',month)
try
:
pre_date
=
datetime
.
datetime
(
year
,
month
,
date
.
day
)
except
:
pre_date
=
datetime
.
datetime
(
year
,
month
,
calendar
.
monthrange
(
year
,
month
)[
1
])
return
pre_date
def
is_exsits
(
a
,
b
):
"""
判断日期是否存在, 将日期与基金最开始的时间对比, 如果存在,返回日期, 不存在,返回None
:param a: 基金初始时间
:param b: 需要计算收益的起始时间
:return:
"""
if
a
<
b
:
return
True
else
:
return
False
def
year_minus
(
date
,
n
):
"""
计算对标的前几个年份,如2020.3的前1年是2019.3
输入datetime格式
:return:
"""
day
=
date
.
day
month
=
date
.
month
year
=
date
.
year
-
n
pre_date
=
datetime
.
datetime
(
year
,
month
,
day
)
return
pre_date
def
range_return
(
end_df
,
begin_df
):
"""
区间收益
"""
d
=
end_df
/
begin_df
-
1
return
d
def
annual_return
(
range_return
,
df
,
n
):
"""
年化收益
"""
d
=
(
1
+
range_return
)
**
(
n
/
len
(
df
))
-
1
return
d
def
gain_loss_ratio
(
simple_return
):
"""
盈亏比
"""
pos
=
simple_return
[
simple_return
>=
0
]
.
sum
()
neg
=
simple_return
[
simple_return
<
0
]
.
sum
()
d
=
-
pos
/
neg
return
d
def
alpha_beta
(
simple_return
,
b_simple_return
,
n
):
"""
alpha, beta
"""
df
=
pd
.
DataFrame
()
from
sklearn.linear_model
import
LinearRegression
linreg
=
LinearRegression
()
l
=
len
(
simple_return
)
df
[
'returns'
]
=
simple_return
df
[
'b_returns'
]
=
b_simple_return
X
=
np
.
array
(
df
[[
'b_returns'
]][:
l
-
1
])
y
=
np
.
array
(
df
[[
'returns'
]][:
l
-
1
])
linreg
.
fit
(
X
,
y
)
beta
=
linreg
.
coef_
[
0
][
0
]
alpha
=
linreg
.
intercept_
[
0
]
*
n
return
alpha
,
beta
def
win_rate
(
simple_return
,
b_simple_return
):
"""
胜率
"""
df
=
pd
.
DataFrame
()
df
[
'diff'
]
=
simple_return
-
b_simple_return
d
=
df
[
df
[
'diff'
]
>=
0
][
'diff'
]
.
count
()
/
df
[
'diff'
]
.
count
()
return
d
def
lpm
(
returns
,
threshold
,
order
):
"""
下偏距, 一阶和二阶
order: 是一阶和二阶的设定
threshold: 是期望收益率
"""
# This method returns a lower partial moment of the returns
# Create an array he same length as returns containing the minimum return threshold
threshold_array
=
np
.
empty
(
len
(
returns
))
threshold_array
.
fill
(
threshold
)
# Calculate the difference between the threshold and the returns
diff
=
threshold_array
-
returns
# Set the minimum of each to 0
diff
=
diff
.
clip
(
min
=
0
)
# Return the sum of the different to the power of order
return
np
.
sum
(
diff
**
order
)
/
len
(
returns
)
def
var
(
returns
,
alpha
):
"""
计算var值,历史收益率方法, 将历史收益率由小到大排序,去置信区间的分位点, alpha是置信区间
"""
# This method calculates the historical simulation var of the returns
sorted_returns
=
np
.
sort
(
returns
)
# Calculate the index associated with alpha
index
=
int
(
alpha
*
len
(
sorted_returns
))
# VaR should be positive
return
abs
(
sorted_returns
[
index
])
def
cvar
(
returns
,
alpha
):
# This method calculates the condition VaR of the returns
sorted_returns
=
np
.
sort
(
returns
)
# Calculate the index associated with alpha
index
=
int
(
alpha
*
len
(
sorted_returns
))
# Calculate the total VaR beyond alpha
sum_var
=
sorted_returns
[
0
]
for
i
in
range
(
1
,
index
):
sum_var
+=
sorted_returns
[
i
]
# Return the average VaR
# CVaR should be positive
return
abs
(
sum_var
/
index
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment