diff --git a/common/mysql_uitl.py b/common/mysql_uitl.py index f55bd21eba947868ddac0e06273294385632e8da..0c2581214ee32cdfc50a21cf9fbe96989d0b4edf 100644 --- a/common/mysql_uitl.py +++ b/common/mysql_uitl.py @@ -7,6 +7,7 @@ import pymysql from config.config import Settings logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) + def get_connection(): return pymysql.connect( host=Settings.MYSQL['host'], @@ -58,7 +59,7 @@ def fetch_one_list(sql, args): def fetch_all_list(sql, args): conn = get_connection() cursor = conn.cursor() - cursor.execute(sql, args) + cursor.executemany(sql, args) data_list = cursor.fetchall() conn.commit() connect_close(cursor, conn) @@ -131,10 +132,10 @@ def connect_close(cursor, conn): # ä¿å˜è‡³mysql def save_result(database, table, result_dict, file_name): + row = 0 if result_dict: result_table = database + '.' + table logging.info(f'save {result_table} start') - row = 0 fields = None place_holder = None values = list() @@ -145,11 +146,49 @@ def save_result(database, table, result_dict, file_name): values.append(tuple(i.values())) sql = f'''replace into {result_table} ( {fields} ) values ( {place_holder} )''' rs = insert_batch(sql, values) - if rs == row or rs == 2 * row: # å› ä¸ºè¿™é‡Œç”¨çš„æ˜¯replace + if rs >= row: logging.info(f'save {result_table} success {row}') else: logging.error(f'save {result_table} error æ•°æ®ä¸º:{row}行,æ’å…¥æˆåŠŸä¸º:{rs} 行 执行程åºä¸ºï¼š{file_name}') + return row + +def save_etl_log(database, table, data_dt, row, status, task_file, now_time): + sql = f'''replace into tamp_data_dwd.dwd_etl_log (data_dt,`database`,`table`,`rows`,`status`,task_file,run_time) + values('{data_dt}', '{database}', '{table}', {row}, '{status}', '{task_file}', '{now_time}')''' + insert(sql, None) + + + +# def upsert(database, table, result_dict, file_name): +# ''' +# 更新或æ’入,å˜åœ¨åˆ™æ›´æ–°ï¼Œä¸å˜åœ¨ï¼Œåˆ™æ’å…¥ +# :param database: +# :param table: +# :param result_dict: +# :param file_name: +# :return: +# ''' +# if result_dict: +# result_table = database + '.' + table +# logging.info(f'upsert {result_table} start') +# row = 0 +# fields = None +# place_holder = None +# values = list() +# for i in result_dict: +# row = row + 1 +# fields = ','.join([f"`{k}`" for k in i.keys()]) +# place_holder = ','.join(["%s" for _ in i.keys()]) +# values.append(tuple(i.values())) +# +# +# sql = f'''replace into {result_table} ( {fields} ) values ( {place_holder} )''' +# rs = insert_batch(sql, values) +# if rs == row or rs == 2 * row: # å› ä¸ºè¿™é‡Œç”¨çš„æ˜¯replace +# logging.info(f'upsert {result_table} success {row}') +# else: +# logging.error(f'upsert {result_table} error æ•°æ®ä¸º:{row}行,æ’å…¥æˆåŠŸä¸º:{rs} 行 执行程åºä¸ºï¼š{file_name}') def rows_to_dict_list(cursor): diff --git a/common/test_mysql_uitl.py b/common/test_mysql_uitl.py index 8098dfa70977a05c3bede77d7d97abedddbb4f25..90dd40ef79177231c6fe2717da148ee864253ae6 100644 --- a/common/test_mysql_uitl.py +++ b/common/test_mysql_uitl.py @@ -1,9 +1,11 @@ # -*- coding: utf-8 -*- import logging +import sys +from sqlite3 import Row import pymysql from config.test_config import Settings - +logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) def get_connection(): return pymysql.connect( @@ -17,21 +19,48 @@ def get_connection(): ) -def fetch_one(sql): +def fetch_one(sql, args): + conn = get_connection() + cursor = conn.cursor() + cursor.execute(sql, args) + column_names = [col[0] for col in cursor.description] + dict_res = [dict(zip(column_names, row)) for row in cursor.fetchall()] + conn.commit() + connect_close(cursor, conn) + + return dict_res + + +def fetch_all(sql, args): conn = get_connection() cursor = conn.cursor() - cursor.execute(sql) + cursor.execute(sql, args) + column_names = [col[0] for col in cursor.description] + dict_res = [dict(zip(column_names, row)) for row in cursor.fetchall()] + conn.commit() + connect_close(cursor, conn) + + return dict_res + + + +def fetch_one_list(sql, args): + conn = get_connection() + cursor = conn.cursor() + cursor.execute(sql, args) data = cursor.fetchone() + conn.commit() connect_close(cursor, conn) return data -def fetch_all(sql, args): +def fetch_all_list(sql, args): conn = get_connection() cursor = conn.cursor() cursor.execute(sql, args) data_list = cursor.fetchall() + conn.commit() connect_close(cursor, conn) return data_list @@ -42,9 +71,26 @@ def insert(sql, args): cursor = conn.cursor() row = cursor.execute(sql, args) conn.commit() + connect_close(cursor, conn) return row +# def insert_batch(sql, args): +# conn = get_connection() +# cursor = conn.cursor() +# # row = cursor.executemany(sql, args) +# # conn.commit() +# # connect_close(cursor, conn) +# row = 0 +# try: +# row = cursor.executemany(sql, args) +# logging.info(sql, args) +# conn.commit() +# except Exception as e: +# cursor.rollback() +# logging.error(e) +# connect_close(cursor, conn) +# return row def insert_batch(sql, args): @@ -53,9 +99,11 @@ def insert_batch(sql, args): row = cursor.executemany(sql, args) conn.commit() connect_close(cursor, conn) + return row + def update(sql, args): conn = get_connection() cursor = conn.cursor() @@ -70,18 +118,40 @@ def update(sql, args): def update_batch(sql, args): conn = get_connection() cursor = conn.cursor() - row = None - try: - row = cursor.executemany(sql, args) - conn.commit() - except Exception as e: - logging.error(e) - conn.rollback() - finally: - connect_close(cursor, conn) + row = cursor.executemany(sql, args) + conn.commit() + connect_close(cursor, conn) return row def connect_close(cursor, conn): cursor.close() conn.close() + + +# ä¿å˜è‡³mysql +def save_result(database, table, result_dict, file_name): + if result_dict: + result_table = database + '.' + table + logging.info(f'save {result_table} start') + row = 0 + fields = None + place_holder = None + values = list() + for i in result_dict: + row = row + 1 + fields = ','.join([f"`{k}`" for k in i.keys()]) + place_holder = ','.join(["%s" for _ in i.keys()]) + values.append(tuple(i.values())) + sql = f'''replace into {result_table} ( {fields} ) values ( {place_holder} )''' + rs = insert_batch(sql, values) + if rs == row or rs == 2 * row: # å› ä¸ºè¿™é‡Œç”¨çš„æ˜¯replace + logging.info(f'save {result_table} success {row}') + else: + logging.error(f'save {result_table} error æ•°æ®ä¸º:{row}行,æ’å…¥æˆåŠŸä¸º:{rs} 行 执行程åºä¸ºï¼š{file_name}') + + + +def rows_to_dict_list(cursor): + columns = [i[0] for i in cursor.description] + return [dict(zip(columns, row)) for row in cursor] \ No newline at end of file diff --git a/common/time_util.py b/common/time_util.py index 4aa5c2783d209ee368ee5c019f11f9aac6732fcd..c0a441be05106573e34ae9adf7ee07b3dc0599c0 100644 --- a/common/time_util.py +++ b/common/time_util.py @@ -74,6 +74,11 @@ def get_run_time(args): # def get_current_week(): +# """ +# 给定一个日期-返回日期所在周的周一0点时间 å’Œ 周日23点59分59ç§’ +# :param date_str: 如:"2020-05-01" +# :return: 给定一个日期-返回日期所在周的周一0点时间 å’Œ 周日23点59分59ç§’ +# """ # import datetime # monday, sunday = datetime.date.today(), datetime.date.today() # one_day = datetime.timedelta(days=1) @@ -88,43 +93,43 @@ def get_run_time(args): # } # return week_dict -# def get_current_week(data_dt): -# """ -# 给定一个日期-返回日期所在周的周一0点时间 å’Œ 周日23点59分59ç§’ -# :param date_str: 如:"2020-05-01" -# :return: 给定一个日期-返回日期所在周的周一0点时间 å’Œ 周日23点59分59ç§’ -# """ -# import datetime -# now_time = datetime.datetime.strptime(data_dt + " 00:00:00", "%Y-%m-%d %H:%M:%S") -# monday = now_time - datetime.timedelta(days=now_time.weekday(), hours=now_time.hour, -# minutes=now_time.minute, seconds=now_time.second, -# microseconds=now_time.microsecond) -# sunday = monday + datetime.timedelta(days=6, hours=23, minutes=59, seconds=59) -# week_dict = { -# 'monday': monday, -# 'sunday': sunday -# } -# return week_dict - - - def get_current_week(data_dt): """ - 给定一个日期-返回日期所在上周五的0点时间 å’Œ 本周四23点59分59ç§’ + 给定一个日期-返回日期所在周的周一0点时间 å’Œ 周日23点59分59ç§’ :param date_str: 如:"2020-05-01" - :return: 给定一个日期-返回日期所在上周五的0点时间 å’Œ 本周四23点59分59ç§’ + :return: 给定一个日期-返回日期所在周的周一0点时间 å’Œ 周日23点59分59ç§’ """ import datetime now_time = datetime.datetime.strptime(data_dt + " 00:00:00", "%Y-%m-%d %H:%M:%S") - last_friday = now_time - datetime.timedelta(days=(now_time.weekday() + 3) % 7, hours=now_time.hour, + monday = now_time - datetime.timedelta(days=now_time.weekday(), hours=now_time.hour, minutes=now_time.minute, seconds=now_time.second, microseconds=now_time.microsecond) - thursday = last_friday + datetime.timedelta(days=6, hours=23, minutes=59, seconds=59) + sunday = monday + datetime.timedelta(days=6, hours=23, minutes=59, seconds=59) week_dict = { - 'last_friday': last_friday, - 'thursday': thursday + 'monday': monday, + 'sunday': sunday } return week_dict +# + + +# def get_current_week(data_dt): +# """ +# 给定一个日期-返回日期所在上周五的0点时间 å’Œ 本周四23点59分59ç§’ +# :param date_str: 如:"2020-05-01" +# :return: 给定一个日期-返回日期所在上周五的0点时间 å’Œ 本周四23点59分59ç§’ +# """ +# import datetime +# now_time = datetime.datetime.strptime(data_dt + " 00:00:00", "%Y-%m-%d %H:%M:%S") +# last_friday = now_time - datetime.timedelta(days=(now_time.weekday() + 3) % 7, hours=now_time.hour, +# minutes=now_time.minute, seconds=now_time.second, +# microseconds=now_time.microsecond) +# thursday = last_friday + datetime.timedelta(days=6, hours=23, minutes=59, seconds=59) +# week_dict = { +# 'last_friday': last_friday, +# 'thursday': thursday +# } +# return week_dict # 获å–周一至周日的日期 diff --git a/edw/ads/basic/__init__.py b/edw/ads/basic/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/edw/ads/basic/ads_app_install.py b/edw/ads/basic/ads_app_install.py new file mode 100644 index 0000000000000000000000000000000000000000..c34af01df3f4852ee86601bf277cdefe918d8346 --- /dev/null +++ b/edw/ads/basic/ads_app_install.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- + + +''' +app安装é‡,定时任务,æ¯å¤©è¿è¡Œä¸€æ¬¡ +''' +import json +import logging +import os +import sys +import time + +import requests + +from common.mysql_uitl import save_result, fetch_all, save_etl_log +from common.time_util import YMDHMS_FORMAT, now_str + +logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) +file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] + + +def ads_app_install(data_dt): + install_num_dict = query_app_install(data_dt) + row = save_result('tamp_data_ads', 'ads_app_install', install_num_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_ads', 'ads_app_install', data_dt, row, 'done', task_file, now_time) + + +def query_app_install(data_dt): + data_dt = (datetime.datetime.strptime(str(data_dt), "%Y-%m-%d") - datetime.timedelta(days=1)).strftime("%Y-%m-%d") + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.data_dt + ,case when weekday(p.data_dt) + 1 = '1' then '星期一' + when weekday(p.data_dt) + 1 = '2' then '星期二' + when weekday(p.data_dt) + 1 = '3' then '星期三' + when weekday(p.data_dt) + 1 = '4' then '星期四' + when weekday(p.data_dt) + 1 = '5' then '星期五' + when weekday(p.data_dt) + 1 = '6' then '星期å…' + when weekday(p.data_dt) + 1 = '7' then '星期日' + else '计算错误' + end as weekday + ,sum(p.pc_num) as pc_num + ,sum(p.ios_num) as ios_num + ,sum(p.android_num) as android_num + ,sum(p.total_app_num) as total_app_num + from + ( + select data_dt + ,count(1) as pc_num + ,0 as ios_num + ,0 as android_num + ,0 as total_app_num + from tamp_data_dwd.dwd_user_login_pc_record + where data_dt = %s + and team_id <> '0' + group by data_dt + union all + select data_dt + ,0 as pc_num + ,ios_num + ,android_num + ,total_app_num + from tamp_data_dwd.dwd_app_install + where data_dt = %s + ) p + group by p.data_dt, weekday + ''' + install_num_dict = fetch_all(sql, (data_dt, data_dt)) + logging.info(f'{function_name} success') + return install_num_dict + + +if __name__ == '__main__': + import datetime + begin = datetime.date(2021, 9, 19) + end = datetime.date(2021, 9, 22) + data_dt = begin + delta = datetime.timedelta(days=1) + while data_dt <= end: + print(data_dt.strftime("%Y-%m-%d")) + ads_app_install(data_dt) + data_dt += delta \ No newline at end of file diff --git a/edw/ads/basic/ads_platform_active.py b/edw/ads/basic/ads_platform_active.py new file mode 100644 index 0000000000000000000000000000000000000000..d290a589b919018fdd3ff9d07733e72b39a35fa8 --- /dev/null +++ b/edw/ads/basic/ads_platform_active.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- +import logging +import os +import sys + +from common.mysql_uitl import fetch_all, save_result, insert_batch, insert, save_etl_log +from common.time_util import get_current_week, month_start_and_end, now_str, YMDHMS_FORMAT + +''' +按平å°ç»Ÿè®¡ç”¨æˆ·æ—¥æ´»/周活/月活 +''' +logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) +file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] + + +def ads_platform_active(data_dt): + # 日活 + platform_dau_dict = query_platform_dau(data_dt) + row = save_result('tamp_data_ads', 'ads_platform_dau', platform_dau_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_ads', 'ads_platform_dau', data_dt, row, 'done', task_file, now_time) + + # 周活 + platform_wau_dict = query_platform_wau(data_dt) + row = save_result('tamp_data_ads', 'ads_platform_wau', platform_wau_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_ads', 'ads_platform_wau', data_dt, row, 'done', task_file, now_time) + + # 月活 + platform_mau_dict = query_platform_mau(data_dt) + row = save_result('tamp_data_ads', 'ads_platform_mau', platform_mau_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_ads', 'ads_platform_mau', data_dt, row, 'done', task_file, now_time) + + +def query_platform_dau(data_dt): + ''' + 按平å°ç»Ÿè®¡ç”¨æˆ·æ—¥æ´» + :param data_dt: + :return: + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select data_dt + ,case when weekday(data_dt) + 1 = '1' then '星期一' + when weekday(data_dt) + 1 = '2' then '星期二' + when weekday(data_dt) + 1 = '3' then '星期三' + when weekday(data_dt) + 1 = '4' then '星期四' + when weekday(data_dt) + 1 = '5' then '星期五' + when weekday(data_dt) + 1 = '6' then '星期å…' + when weekday(data_dt) + 1 = '7' then '星期日' + else '计算错误' + end as weekday + ,count(distinct case when env = 'android' then user_id end) as android_dau + ,count(distinct case when env = 'ios' then user_id end) as ios_dau + ,count(distinct case when env in ('android', 'ios') then user_id end) as app_dau + ,count(distinct case when env = 'wechat' then user_id end) as wechat_dau + ,count(distinct case when env = 'xcx' then user_id end) as xcx_dau + ,count(distinct case when env = 'PCManager' then user_id end) as pc_dau + ,count(distinct user_id) as all_dau + from tamp_data_dwd.dwd_user_login_environment + where data_dt = %s + and team_id <> '0' + group by data_dt, weekday + ''' + platform_dau_dict = fetch_all(sql, data_dt) + logging.info(f'{function_name} success') + return platform_dau_dict + + +def query_platform_wau(data_dt): + ''' + 按平å°ç»Ÿè®¡ç”¨æˆ·å‘¨æ´» + :param data_dt: + :return: + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + current_week = get_current_week(data_dt) + start_date = str(current_week['monday'])[0: 10] + end_date = str(current_week['sunday'])[0: 10] + sql = ''' + select concat_ws('~', %s, %s) as data_dt + ,count(distinct case when env = 'android' then user_id end) as android_wau + ,count(distinct case when env = 'ios' then user_id end) as ios_wau + ,count(distinct case when env in ('android', 'ios') then user_id end) as app_wau + ,count(distinct case when env = 'wechat' then user_id end) as wechat_wau + ,count(distinct case when env = 'xcx' then user_id end) as xcx_wau + ,count(distinct case when env = 'PCManager' then user_id end) as pc_wau + ,count(distinct user_id) as all_wau + from tamp_data_dwd.dwd_user_login_environment + where data_dt between %s and %s + and team_id <> '0' + group by concat_ws('~', %s, %s) + ''' + platform_wau_dict = fetch_all(sql, (start_date, end_date, start_date, end_date, start_date, end_date)) + logging.info(f'{function_name} success') + return platform_wau_dict + + +def query_platform_mau(data_dt): + ''' + 按平å°ç»Ÿè®¡ç”¨æˆ·æœˆæ´» + :param data_dt: + :return: + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + month_dict = month_start_and_end(data_dt) + start_date = month_dict.get('start_date') + end_date = month_dict.get('end_date') + sql = ''' + select date_format(data_dt, '%%Y-%%m') as data_dt + ,count(distinct case when env = 'android' then user_id end) as android_mau + ,count(distinct case when env = 'ios' then user_id end) as ios_mau + ,count(distinct case when env in ('android', 'ios') then user_id end) as app_mau + ,count(distinct case when env = 'wechat' then user_id end) as wechat_mau + ,count(distinct case when env = 'xcx' then user_id end) as xcx_mau + ,count(distinct case when env = 'PCManager' then user_id end) as pc_mau + ,count(distinct user_id) as all_mau + from tamp_data_dwd.dwd_user_login_environment + where data_dt between %s and %s + group by date_format(data_dt, '%%Y-%%m') + ''' + platform_mau_dict = fetch_all(sql, (start_date, end_date)) + logging.info(f'{function_name} success') + return platform_mau_dict + + +if __name__ == '__main__': + import datetime + begin = datetime.date(2021, 1, 1) + end = datetime.date(2021, 9, 22) + data_dt = begin + delta = datetime.timedelta(days=1) + while data_dt <= end: + print(data_dt.strftime("%Y-%m-%d")) + ads_platform_active(str(data_dt)) + data_dt += delta diff --git a/edw/ads/basic/ads_user_level_active.py b/edw/ads/basic/ads_user_level_active.py new file mode 100644 index 0000000000000000000000000000000000000000..4c7079b26761aca9e6e7bfa6978d757ccaae1755 --- /dev/null +++ b/edw/ads/basic/ads_user_level_active.py @@ -0,0 +1,285 @@ +# -*- coding: utf-8 -*- +import logging +import os +import sys + +from common.mysql_uitl import fetch_all, save_result, insert_batch, insert, save_etl_log +from common.time_util import get_current_week, month_start_and_end, YMDHMS_FORMAT, now_str + +''' +按平用户身份统计日活/周活/月活 +''' +logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) +file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] + + +def ads_user_level_active(data_dt): + # 日活 + level_dau_dict = query_level_dau(data_dt) + row = save_result('tamp_data_ads', 'ads_user_level_dau', level_dau_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_ads', 'ads_user_level_dau', data_dt, row, 'done', task_file, now_time) + + # 周活 + level_wau_dict = query_level_wau(data_dt) + row = save_result('tamp_data_ads', 'ads_user_level_wau', level_wau_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_ads', 'ads_user_level_wau', data_dt, row, 'done', task_file, now_time) + + # 月活 + level_mau_dict = query_level_mau(data_dt) + row = save_result('tamp_data_ads', 'ads_user_level_mau', level_mau_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_ads', 'ads_user_level_mau', data_dt, row, 'done', task_file, now_time) + + +def query_level_dau(data_dt): + ''' + 按用户身份统计用户日活 + :param data_dt: + :return: + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.data_dt + ,p.weekday + ,p.dau_0 + ,p.all_dau_1 + ,t.all_level1 + ,round(p.all_dau_1 / t.all_level1 * 100, 3) as all_level1_rate + ,p.dau_1 + ,t.level1 + ,round(p.dau_1 / t.level1 * 100, 3) as level1_rate + ,p.dau_10 + ,t.level10 + ,case when t.level10 = 0 then 0 else round(p.dau_10 / t.level10 * 100, 3) end as level10_rate + ,p.all_dau_20 + ,t.all_level20 + ,case when t.all_level20 = 0 then 0 else round(p.all_dau_20 / t.all_level20 * 100, 3) end as all_level20_rate + ,p.dau_20 + ,t.level20 + ,case when t.level20 = 0 then 0 else round(p.dau_20 / t.level20 * 100, 3) end as level20_rate + ,p.dau_30 + ,t.level30 + ,case when t.level30 = 0 then 0 else round(p.dau_30 / t.level30 * 100, 3) end as level30_rate + ,p.dau_40 + ,t.level40 + ,case when t.level40 = 0 then 0 else round(p.dau_40 / t.level40 * 100, 3) end as level40_rate + ,p.dau_all + from + ( + select data_dt + ,case when weekday(data_dt) + 1 = '1' then '星期一' + when weekday(data_dt) + 1 = '2' then '星期二' + when weekday(data_dt) + 1 = '3' then '星期三' + when weekday(data_dt) + 1 = '4' then '星期四' + when weekday(data_dt) + 1 = '5' then '星期五' + when weekday(data_dt) + 1 = '6' then '星期å…' + when weekday(data_dt) + 1 = '7' then '星期日' + else '计算错误' + end as weekday + ,count(distinct(case when level_grade = 0 then user_id end)) as dau_0 + ,count(distinct(case when level_grade >= 1 then user_id end)) as all_dau_1 + ,count(distinct(case when level_grade = 1 then user_id end)) as dau_1 + ,count(distinct(case when level_grade = 10 then user_id end)) as dau_10 + ,count(distinct(case when level_grade >=20 then user_id end)) as all_dau_20 + ,count(distinct(case when level_grade = 20 then user_id end)) as dau_20 + ,count(distinct(case when level_grade = 30 then user_id end)) as dau_30 + ,count(distinct(case when level_grade = 40 then user_id end)) as dau_40 + ,count(distinct user_id) as dau_all + from tamp_data_dwd.dwd_user_login_environment + where data_dt = %s + group by data_dt + ) p + left join + ( + select %s as data_dt + ,sum(all_level1_num) as all_level1 + ,sum(level1_num) as level1 + ,sum(level10_num) as level10 + ,sum(all_level20_num) as all_level20 + ,sum(level20_num) as level20 + ,sum(level30_num) as level30 + ,sum(level40_num) as level40 + from tamp_analysis.new_user_growth_trend_view + where data_dt <= %s + ) t + on p.data_dt = t.data_dt + ''' + level_dau_dict = fetch_all(sql, data_dt) + logging.info(f'{function_name} success') + return level_dau_dict + + +def query_level_wau(data_dt): + ''' + 按用户身份统计用户周活 + :param data_dt: + :return: + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + current_week = get_current_week(data_dt) + start_date = str(current_week['monday'])[0: 10] + end_date = str(current_week['sunday'])[0: 10] + sql = ''' + select p.data_dt + ,p.wau_0 + ,p.all_wau_1 + ,t.all_level1 + ,round(p.all_wau_1 / t.all_level1 * 100, 3) as all_level1_rate + ,p.wau_1 + ,t.level1 + ,round(p.wau_1 / t.level1 * 100, 3) as level1_rate + ,p.wau_10 + ,t.level10 + ,case when t.level10 = 0 then 0 else round(p.wau_10 / t.level10 * 100 , 3) end as level10_rate + ,p.all_wau_20 + ,t.all_level20 + ,case when t.all_level20 = 0 then 0 else round(p.all_wau_20 / t.all_level20 * 100 , 3) end as all_level20_rate + ,p.wau_20 + ,t.level20 + ,case when t.level20 = 0 then 0 else round(p.wau_20 / t.level20 * 100 , 3) end as level20_rate + ,p.wau_30 + ,t.level30 + ,case when t.level30 = 0 then 0 else round(p.wau_30 / t.level30 * 100 , 3) end as level30_rate + ,p.wau_40 + ,t.level40 + ,case when t.level40 = 0 then 0 else round(p.wau_40 / t.level40 * 100 , 3) end as level40_rate + ,p.wau_all + from + ( + select concat(%s, '~', %s) as data_dt + ,%s as run_dt + ,count(distinct(case when o.level_grade = 0 then o.user_id end)) as wau_0 + ,count(distinct(case when o.level_grade >= 1 then o.user_id end)) as all_wau_1 + ,count(distinct(case when o.level_grade = 1 then o.user_id end)) as wau_1 + ,count(distinct(case when o.level_grade = 10 then o.user_id end)) as wau_10 + ,count(distinct(case when o.level_grade >=20 then o.user_id end)) as all_wau_20 + ,count(distinct(case when o.level_grade = 20 then o.user_id end)) as wau_20 + ,count(distinct(case when o.level_grade = 30 then o.user_id end)) as wau_30 + ,count(distinct(case when o.level_grade = 40 then o.user_id end)) as wau_40 + ,count(distinct o.user_id) as wau_all + from + ( + select max(data_dt) as data_dt + ,user_id + ,max(level_grade) as level_grade + from tamp_data_dwd.dwd_user_login_environment + where data_dt between %s and %s + group by user_id + ) o + group by concat(%s, '~', %s) + ) p + left join + ( + select %s as data_dt + ,sum(all_level1_num) as all_level1 + ,sum(level1_num) as level1 + ,sum(level10_num) as level10 + ,sum(all_level20_num) as all_level20 + ,sum(level20_num) as level20 + ,sum(level30_num) as level30 + ,sum(level40_num) as level40 + from tamp_analysis.new_user_growth_trend_view + where data_dt <= %s + ) t + on p.run_dt = t.data_dt + ''' + platform_wau_dict = fetch_all(sql, (start_date, end_date, start_date, end_date)) + logging.info(f'{function_name} success') + return platform_wau_dict + + +def query_level_mau(data_dt): + ''' + 按平å°ç»Ÿè®¡ç”¨æˆ·æœˆæ´» + :param data_dt: + :return: + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + month_dict = month_start_and_end(data_dt) + start_date = month_dict.get('start_date') + end_date = month_dict.get('end_date') + sql = ''' + select p.data_dt + ,p.mau_0 + ,p.all_mau_1 + ,t.all_level1 + ,round(p.all_mau_1 / t.all_level1 * 100, 3) as all_level1_rate + ,p.mau_1 + ,t.level1 + ,round(p.mau_1 / t.level1 * 100, 3) as level1_rate + ,p.mau_10 + ,t.level10 + ,case when t.level10 = 0 then 0 else round(p.mau_10 / t.level10 * 100, 3) end as level10_rate + ,p.all_mau_20 + ,t.all_level20 + ,case when t.all_level20 = 0 then 0 else round(p.all_mau_20 / t.all_level20 * 100, 3) end as all_level20_rate + ,p.mau_20 + ,t.level20 + ,case when t.level20 = 0 then 0 else round(p.mau_20 / t.level20 * 100, 3) end as level20_rate + ,p.mau_30 + ,t.level30 + ,case when t.level30 = 0 then 0 else round(p.mau_30 / t.level30 * 100, 3) end as level30_rate + ,p.mau_40 + ,t.level40 + ,case when t.level40 = 0 then 0 else round(p.mau_40 / t.level40 * 100, 3) end as level40_rate + ,p.mau_all + from + ( + select o.data_dt + ,%s as run_dt + ,count(distinct(case when o.level_grade = 0 then o.user_id end)) as mau_0 + ,count(distinct(case when o.level_grade >= 1 then o.user_id end)) as all_mau_1 + ,count(distinct(case when o.level_grade = 1 then o.user_id end)) as mau_1 + ,count(distinct(case when o.level_grade = 10 then o.user_id end)) as mau_10 + ,count(distinct(case when o.level_grade >= 20 then o.user_id end)) as all_mau_20 + ,count(distinct(case when o.level_grade = 20 then o.user_id end)) as mau_20 + ,count(distinct(case when o.level_grade = 30 then o.user_id end)) as mau_30 + ,count(distinct(case when o.level_grade = 40 then o.user_id end)) as mau_40 + ,count(distinct o.user_id) as mau_all + from + ( + select date_format(data_dt, '%%Y-%%m') as data_dt + ,user_id + ,max(level_grade) as level_grade + from tamp_analysis.user_login_area + where data_dt between %s and %s + group by date_format(data_dt, '%%Y-%%m'),user_id + ) o + group by o.data_dt + ) p + left join + ( + select %s as data_dt + ,sum(all_level1_num) as all_level1 + ,sum(level1_num) as level1 + ,sum(level10_num) as level10 + ,sum(all_level20_num) as all_level20 + ,sum(level20_num) as level20 + ,sum(level30_num) as level30 + ,sum(level40_num) as level40 + from tamp_analysis.new_user_growth_trend_view + where data_dt <= %s + ) t + on p.run_dt = t.data_dt + ''' + platform_mau_dict = fetch_all(sql, (start_date, end_date)) + logging.info(f'{function_name} success') + return platform_mau_dict + + +if __name__ == '__main__': + import datetime + begin = datetime.date(2021, 1, 1) + end = datetime.date(2021, 9, 17) + data_dt = begin + delta = datetime.timedelta(days=1) + while data_dt <= end: + print(data_dt.strftime("%Y-%m-%d")) + ads_user_level_active(str(data_dt)) + data_dt += delta diff --git a/edw/ads/user/ads_user_basic_behavior.py b/edw/ads/user/ads_user_basic_behavior.py index abf55c702cf6dd8dc4905ae960991b60b4286ea0..ba2eb9a71dca4755380a93279a8cac988592ea74 100644 --- a/edw/ads/user/ads_user_basic_behavior.py +++ b/edw/ads/user/ads_user_basic_behavior.py @@ -4,24 +4,28 @@ ''' 用户登录行为汇总,定时任务,æ¯2ä¸ªå°æ—¶è¿è¡Œä¸€æ¬¡ ''' +import os -from common.file_uitil import get_file_path, get_file_name -from common.mysql_uitl import fetch_all, insert_batch +from common.mysql_uitl import save_result, save_etl_log import logging import sys -from common.file_uitil import get_file_path, get_file_name -from common.mysql_uitl import fetch_all, insert_batch +from common.mysql_uitl import fetch_all, insert_batch +from common.time_util import now_str, YMDHMS_FORMAT logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] -def ads_user_basic_behavior(): +def ads_user_basic_behavior(data_dt): user_basic_behavior_dict = query_ads_user_basic_behavior() - save_ads_user_basic_behavior(user_basic_behavior_dict) + # save_ads_user_basic_behavior(user_basic_behavior_dict) + row = save_result('tamp_data_ads', 'ads_user_basic_behavior', user_basic_behavior_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_ads', 'ads_user_level_dau', data_dt, row, 'done', task_file, now_time) # è¿™é‡Œæœ‰ç‚¹ç»†èŠ‚ï¼Œæ²¡æœ‰ä¸¥æ ¼åŽ»åšã€‚ diff --git a/edw/ads/user/ads_user_browse_fund.py b/edw/ads/user/ads_user_browse_fund.py index f3e5e4d333654b6962e1477acd03fffc3e779e6d..fa2f0bf086f06907647b7830dd7e9daf87dd8b5d 100644 --- a/edw/ads/user/ads_user_browse_fund.py +++ b/edw/ads/user/ads_user_browse_fund.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- import logging +import os import sys -from common.mysql_uitl import fetch_all, save_result, insert_batch, insert +from common.mysql_uitl import fetch_all, save_result, insert_batch, insert, save_etl_log +from common.time_util import YMDHMS_FORMAT, now_str ''' 用户æµè§ˆäº§å“明细数æ®ç»Ÿè®¡å’Œæ±‡æ€»æ•°æ®ç»Ÿè®¡,定时任务,æ¯2ä¸ªå°æ—¶è¿è¡Œä¸€æ¬¡ @@ -14,13 +16,20 @@ p2060 探普产å“详情 ''' logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def ads_user_browse_fund(data_dt): browse_fund_details_dict = query_user_browse_fund_details(data_dt) + row = save_result('tamp_data_ads', 'ads_user_browse_fund_details', browse_fund_details_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_ads', 'ads_user_browse_fund_details', data_dt, row, 'done', task_file, now_time) + browse_fund_summary_dict = query_user_browse_fund_summary() - save_result('tamp_data_ads', 'ads_user_browse_fund_details', browse_fund_details_dict, file_name) - save_result('tamp_data_ads', 'ads_user_browse_fund_summary', browse_fund_summary_dict, file_name) + row = save_result('tamp_data_ads', 'ads_user_browse_fund_summary', browse_fund_summary_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_ads', 'ads_user_browse_fund_summary', data_dt, row, 'done', task_file, now_time) + def query_user_browse_fund_details(data_dt): diff --git a/edw/ads/user/ads_user_content_order_records.py b/edw/ads/user/ads_user_content_order_records.py index 62fee23fe21d508b473f134946c4e003669fff99..ca8c702cf97e191a5ee858e75a5e3bb3d4eab8a1 100644 --- a/edw/ads/user/ads_user_content_order_records.py +++ b/edw/ads/user/ads_user_content_order_records.py @@ -6,16 +6,22 @@ 注æ„:在ads层需è¦éšè—æ‰‹æœºå· ''' import logging +import os import sys -from common.mysql_uitl import fetch_all, save_result +from common.mysql_uitl import fetch_all, save_result, save_etl_log +from common.time_util import now_str, YMDHMS_FORMAT logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) -file_name = sys.argv[0] +file_name = sys.argv +file_name1 = os.path.basename(__file__) +task_file = os.path.split(__file__)[-1].split(".")[0] -def ads_user_content_order_records(): +def ads_user_content_order_records(data_dt): order_records_dict = query_user_content_order() - save_result('tamp_data_ads', 'ads_user_content_order_records', order_records_dict, file_name) + row = save_result('tamp_data_ads', 'ads_user_content_order_records', order_records_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_ads', 'ads_user_content_order_records', data_dt, row, 'done', task_file, now_time) def query_user_content_order(): @@ -61,4 +67,4 @@ def query_user_content_order(): if __name__ == '__main__': - ads_user_content_order_records() \ No newline at end of file + ads_user_content_order_records() diff --git a/edw/ads/user/ads_user_learn_course.py b/edw/ads/user/ads_user_learn_course.py index 81d4a960afbb6b691ac149df9d9bf37e9c37dd49..973f6f355a8581db56cf8d53978f4b3afce8af1a 100644 --- a/edw/ads/user/ads_user_learn_course.py +++ b/edw/ads/user/ads_user_learn_course.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- import logging +import os import sys -from common.mysql_uitl import fetch_all, save_result, insert_batch, insert +from common.mysql_uitl import fetch_all, save_result, insert_batch, insert, save_etl_log +from common.time_util import now_str, YMDHMS_FORMAT ''' ç”¨æˆ·è§‚çœ‹ç›´æ’æ˜Žç»†æ•°æ®ç»Ÿè®¡,定时任务,æ¯2ä¸ªå°æ—¶è¿è¡Œä¸€æ¬¡ @@ -16,6 +18,7 @@ p10507 试å¬è¯¾ç¨‹é¡µé¢(课程包) ''' logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def ads_user_learn_course(data_dt): @@ -27,15 +30,19 @@ def ads_user_learn_course(data_dt): learn_course_details_dict = query_user_learn_course_details(data_dt) merge_learn_course(learn_course_details_dict, invite_people_record_result_dict) # ä¿å˜æ˜Žç´°æ•°æ® - save_result('tamp_data_ads', 'ads_user_learn_course_details', learn_course_details_dict, file_name) + row = save_result('tamp_data_ads', 'ads_user_learn_course_details', learn_course_details_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_ads', 'ads_user_learn_course_details', data_dt, row, 'done', task_file, now_time) single_course_invite_summary_dict = query_single_course_invite_people_summary() course_invite_summary_dict = query_course_invite_people_summary() invite_people_summary_result_dict = merge_course_invite_people(single_course_invite_summary_dict, course_invite_summary_dict) learn_course_summary_dict = query_user_learn_course_summary() merge_learn_course(learn_course_summary_dict, invite_people_summary_result_dict) - # # ä¿å˜æ±‡æ€»æ•°æ® + # ä¿å˜æ±‡æ€»æ•°æ® save_result('tamp_data_ads', 'ads_user_learn_course_summary', learn_course_summary_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_ads', 'ads_user_learn_course_summary', data_dt, row, 'done', task_file, now_time) # å•èŠ‚è¯¾å’Œè¯¾ç¨‹åŒ…é‚€è¯·äººæ•°ï¼Œæ²¡æœ‰ä¸¥æ ¼åŽ»é‡é‚€è¯·äººæ•°ï¼Œåªæ˜¯å¯¹å•节课程,邀请人数åšäº†åŽ»é‡ï¼Œè¯¾ç¨‹åŒ…邀请人数åšäº†åŽ»é‡ @@ -238,7 +245,7 @@ def query_user_learn_course_summary(): ,q.team_id ,p.course_id ,p.course_name - ,coalesce(q.course_type, 'å…è´¹') as course_type + ,coalesce(t.course_type, 'å…è´¹') as course_type ,p.learn_dur ,p.total_dur ,if(p.play_rate >=100.00, 100.00, p.play_rate) as play_rate @@ -249,13 +256,13 @@ def query_user_learn_course_summary(): ,user_id ,course_id ,course_name - ,total_dur + ,max(total_dur) as total_dur # åŽé¢å¯èƒ½æœ‰æ–°è¯¾ç¨‹å¢žåŠ ,sum(learn_dur) as learn_dur ,round(sum(learn_dur) / total_dur * 100, 2) as play_rate ,online_time ,sum(share_num) as share_num from tamp_data_dws.dws_user_learn_course - group by user_id,course_id,course_name,online_time,total_dur + group by user_id,course_id,course_name,online_time ) p left join ( @@ -283,9 +290,9 @@ def query_user_learn_course_summary(): on p.user_id = q.user_id order by p.user_id, p.data_dt desc, p.course_id ''' - watch_live_summary_dict = fetch_all(sql, None) + learn_course_summary_dict = fetch_all(sql, None) logging.info(f'{function_name} success') - return watch_live_summary_dict + return learn_course_summary_dict def merge_course_invite_people(dict1_list, dict2_list): @@ -353,8 +360,8 @@ def merge_learn_course(learn_course_dict, invite_dict): if __name__ == '__main__': import datetime - begin = datetime.date(2021, 4, 14) - end = datetime.date(2021, 9, 14) + begin = datetime.date(2021, 9, 15) + end = datetime.date(2021, 9, 16) data_dt = begin delta = datetime.timedelta(days=1) while data_dt <= end: diff --git a/edw/ads/user/ads_user_watch_live.py b/edw/ads/user/ads_user_watch_live.py index 52d79dafc922c5fe9ba3a2b7ac99c9d6b2912afd..f8f3b1a7cc9db062789440101a0251e848406b86 100644 --- a/edw/ads/user/ads_user_watch_live.py +++ b/edw/ads/user/ads_user_watch_live.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- import logging +import os import sys -from common.mysql_uitl import fetch_all, save_result, insert_batch, insert +from common.mysql_uitl import fetch_all, save_result, insert_batch, insert, save_etl_log +from common.time_util import YMDHMS_FORMAT, now_str ''' ç”¨æˆ·è§‚çœ‹ç›´æ’æ˜Žç»†æ•°æ®ç»Ÿè®¡,定时任务,æ¯2ä¸ªå°æ—¶è¿è¡Œä¸€æ¬¡ @@ -11,13 +13,19 @@ from common.mysql_uitl import fetch_all, save_result, insert_batch, insert ''' logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def ads_user_watch_live(data_dt): watch_live_details_dict = query_user_watch_live_details(data_dt) + row = save_result('tamp_data_ads', 'ads_user_watch_live_details', watch_live_details_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_ads', 'ads_user_watch_live_details', data_dt, row, 'done', task_file, now_time) + watch_live_summary_dict = query_user_watch_live_summary() - save_result('tamp_data_ads', 'ads_user_watch_live_details', watch_live_details_dict, file_name) - save_result('tamp_data_ads', 'ads_user_watch_live_summary', watch_live_summary_dict, file_name) + row = save_result('tamp_data_ads', 'ads_user_watch_live_summary', watch_live_summary_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_ads', 'ads_user_watch_live_summary', data_dt, row, 'done', task_file, now_time) def query_user_watch_live_details(data_dt): diff --git a/edw/ads/user/ads_user_watch_short_video.py b/edw/ads/user/ads_user_watch_short_video.py index 0a40f2c7835a6059fa4fcbcceaec9e03dcdc7646..28625133d4d210001a219e3dd7c71ea9d500af33 100644 --- a/edw/ads/user/ads_user_watch_short_video.py +++ b/edw/ads/user/ads_user_watch_short_video.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- import logging +import os import sys -from common.mysql_uitl import fetch_all, save_result, insert_batch, insert +from common.mysql_uitl import fetch_all, save_result, insert_batch, insert, save_etl_log +from common.time_util import YMDHMS_FORMAT, now_str ''' 用户观看çŸè§†é¢‘明细数æ®ç»Ÿè®¡,定时任务,æ¯2ä¸ªå°æ—¶è¿è¡Œä¸€æ¬¡ @@ -11,13 +13,19 @@ from common.mysql_uitl import fetch_all, save_result, insert_batch, insert ''' logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def ads_user_watch_short_video(data_dt): watch_short_video_details_dict = query_user_watch_short_video_details(data_dt) + row = save_result('tamp_data_ads', 'ads_user_watch_short_video_details', watch_short_video_details_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_ads', 'ads_user_watch_short_video_details', data_dt, row, 'done', task_file, now_time) + watch_short_video_summary_dict = query_user_watch_short_video_summary() - save_result('tamp_data_ads', 'ads_user_watch_short_video_details', watch_short_video_details_dict, file_name) - save_result('tamp_data_ads', 'ads_user_watch_short_video_summary', watch_short_video_summary_dict, file_name) + row = save_result('tamp_data_ads', 'ads_user_watch_short_video_summary', watch_short_video_summary_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_ads', 'ads_user_watch_short_video_summary', data_dt, row, 'done', task_file, now_time) def query_user_watch_short_video_details(data_dt): diff --git a/edw/dwd/basic/dwd_access_history_log.py b/edw/dwd/basic/dwd_access_history_log.py new file mode 100644 index 0000000000000000000000000000000000000000..4e2568a709aae1eebdfcdeb63a712a7518a9835d --- /dev/null +++ b/edw/dwd/basic/dwd_access_history_log.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- + + +''' +access_log åŽ†å²æ•°æ®ä»Žæµ‹è¯•çŽ¯å¢ƒåŒæ¥è‡³ç”Ÿäº§çŽ¯å¢ƒã€‚ï¼ˆæ‰‹å·¥åŒæ¥ä¸€æ¬¡ï¼Œä¸åšå®šæ—¶ä»»åŠ¡ï¼‰ +''' + +import logging +import sys + + +from common.mysql_uitl import save_result +from common.test_mysql_uitl import fetch_all + +logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) +file_name = sys.argv[0] + + + +def dwd_access_history_log(data_dt): + start_time = str(data_dt) + ' 00:00:00' + end_time = str(data_dt) + ' 23:59:59' + access_log_dict = query_access_history_log(start_time, end_time) + save_result('tamp_analysis', 'access_history_log', access_log_dict, file_name) + + +def query_access_history_log(start_time, end_time): + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = '''select * from tamp_data_analysis.access_log where server_time between %s and %s''' + access_log_dict = fetch_all(sql, (start_time, end_time)) + logging.info(f'{function_name} success') + return access_log_dict + + +if __name__ == '__main__': + # dwd_app_install(data_dt) + import datetime + begin = datetime.date(2021, 1, 1) + end = datetime.date(2021, 4, 4) + data_dt = begin + delta = datetime.timedelta(days=1) + while data_dt <= end: + print(data_dt.strftime("%Y-%m-%d")) + dwd_access_history_log(data_dt) + data_dt += delta diff --git a/edw/dwd/basic/dwd_app_install.py b/edw/dwd/basic/dwd_app_install.py index bf150994197a6b5d13d5aa2178b6ceab0995a51a..7360cd2a061f72f007319450e325491271589799 100644 --- a/edw/dwd/basic/dwd_app_install.py +++ b/edw/dwd/basic/dwd_app_install.py @@ -2,19 +2,22 @@ ''' -,定时任务,æ¯å¤©è¿è¡Œä¸€æ¬¡ +app安装é‡,定时任务,æ¯å¤©è¿è¡Œä¸€æ¬¡ ''' import json import logging +import os import sys import time import requests -from common.mysql_uitl import save_result +from common.mysql_uitl import save_result, save_etl_log +from common.time_util import YMDHMS_FORMAT, now_str logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def dwd_app_install(data_dt): @@ -32,10 +35,14 @@ def dwd_app_install(data_dt): android_dict = get_app_install(android_url, 'android') ios_dict = get_app_install(ios_url, 'ios') ret_dict = dict(android_dict, **ios_dict) - ret_list = list() - ret_list.append(ret_dict) - if ret_list: - save_result('tamp_data_dwd', 'dwd_app_install', ret_list, file_name) + total_app_num = ret_dict['android_num'] + ret_dict['ios_num'] + ret_dict['total_app_num'] = total_app_num + if ret_dict: + ret_list = list() + ret_list.append(ret_dict) + row = save_result('tamp_data_dwd', 'dwd_app_install', ret_list, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dwd', 'dwd_app_install', data_dt, row, 'done', task_file, now_time) def get_app_install(url, app_type): @@ -44,9 +51,7 @@ def get_app_install(url, app_type): response_ret = requests.get(url) content = response_ret.content.decode() content_dict = json.loads(content) - print(url) body = content_dict['body'] - print(body) install_dict = {} for i in body: data_dt = i['date'][0: 10] @@ -61,8 +66,8 @@ def get_app_install(url, app_type): if __name__ == '__main__': # dwd_app_install(data_dt) import datetime - begin = datetime.date(2020, 9, 15) - end = datetime.date(2021, 9, 15) + begin = datetime.date(2021, 9, 18) + end = datetime.date(2021, 9, 22) data_dt = begin delta = datetime.timedelta(days=1) while data_dt <= end: diff --git a/edw/dwd/user/dwd_user_browse_fund.py b/edw/dwd/user/dwd_user_browse_fund.py index 4bb8f009d58695fe2fab3f111edfe26ffb0f33e2..d845d4d71a5145c6d433afd22eaea18068554007 100644 --- a/edw/dwd/user/dwd_user_browse_fund.py +++ b/edw/dwd/user/dwd_user_browse_fund.py @@ -5,20 +5,24 @@ 用户æµè§ˆåŸºé‡‘详情,定时任务,æ¯2ä¸ªå°æ—¶è¿è¡Œä¸€æ¬¡ ''' import logging +import os import sys -from common.mysql_uitl import fetch_all, insert_batch - +from common.mysql_uitl import fetch_all, insert_batch, save_result, save_etl_log +from common.time_util import YMDHMS_FORMAT, now_str logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def dwd_user_browse_fund(data_dt): start_time = str(data_dt) + ' 00:00:00' end_time = str(data_dt) + ' 23:59:59' browse_fund_dict = query_user_browse_fund(start_time, end_time) - save_dwd_user_browse_fund(browse_fund_dict) - + # save_dwd_user_browse_fund(browse_fund_dict) + row = save_result('tamp_data_dwd', 'dwd_user_browse_fund', browse_fund_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dwd', 'dwd_user_browse_fund', data_dt, row, 'done', task_file, now_time) def query_user_browse_fund(start_time, end_time): diff --git a/edw/dwd/user/dwd_user_community_res.py b/edw/dwd/user/dwd_user_community_res.py new file mode 100644 index 0000000000000000000000000000000000000000..c76371493340a9bbaf7c4885d8fff0ee590b116a --- /dev/null +++ b/edw/dwd/user/dwd_user_community_res.py @@ -0,0 +1,327 @@ +# -*- coding: utf-8 -*- + +''' +ç”¨æˆ·æ·»åŠ å·¥ä½œå®¤è®°å½•,æ•°æ®é‡ä¸å¤§ï¼Œå…¨é‡æ›´æ–° +''' +import logging +import os +import sys + +from common.mysql_uitl import save_result, fetch_all, save_etl_log +from common.time_util import now_str, YMDHMS_FORMAT + + +logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) +file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] + + +def dwd_user_community_res(data_dt): + # ç›´æ’ + live_dict = query_community_live_res() + row1 = save_result('tamp_data_dwd', 'dwd_user_community_res', live_dict, file_name) + + # è€è¯¾ç¨‹ + old_course_dict = query_community_old_course_res() + row2 = save_result('tamp_data_dwd', 'dwd_user_community_res', old_course_dict, file_name) + + # çŸè§†é¢‘ + short_video_dict = query_community_short_video_res() + row3 = save_result('tamp_data_dwd', 'dwd_user_community_res', short_video_dict, file_name) + + # 公募 + public_fund_dict = query_community_public_fund_res() + row4 = save_result('tamp_data_dwd', 'dwd_user_community_res', public_fund_dict, file_name) + + # ç§å‹Ÿ + private_equity_dict = query_community_private_equity_res() + row5 = save_result('tamp_data_dwd', 'dwd_user_community_res', private_equity_dict, file_name) + + # 探普白åå• + tamp_fund_dict = query_community_tamp_fund_res() + row6 = save_result('tamp_data_dwd', 'dwd_user_community_res', tamp_fund_dict, file_name) + + # ç§æœ‰åŸºé‡‘ + private_fund_dict = query_community_private_fund_res() + row7 = save_result('tamp_data_dwd', 'dwd_user_community_res', private_fund_dict, file_name) + + # æ— å‡€å€¼äº§å“ + ifa_nonav_dict = query_community_ifa_nonav_res() + row8 = save_result('tamp_data_dwd', 'dwd_user_community_res', ifa_nonav_dict, file_name) + + # 图片 + picture_dict = query_community_picture_res() + row9 = save_result('tamp_data_dwd', 'dwd_user_community_res', picture_dict, file_name) + + # 课程 + course_dict = query_community_course_res() + row10 = save_result('tamp_data_dwd', 'dwd_user_community_res', course_dict, file_name) + + row = row1 + row2 + row3 + row4 + row5 + row6 + row7 + row8 + row9 + row10 + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dwd', 'dwd_user_community_res', data_dt, row, 'done', task_file, now_time) + + +def query_community_live_res(): + '''åœˆåæ·»åŠ ç›´æ’''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.news_feed_id + ,p.create_by as user_id + ,p.rel_id as res_id + ,t.zt_name as res_name + ,'live' as res_type + ,p.create_time + ,p.update_time + ,p.delete_tag + ,p.org_id + from tamp_community.news_feed_res p + inner join tamp_zhibo.zhibo_theme t + on p.rel_id = t.id + where p.rel_type = 3 + ''' + live_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return live_dict + + +def query_community_old_course_res(): + '''åœˆåæ·»åŠ è€è¯¾ç¨‹''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.news_feed_id + ,p.create_by as user_id + ,t.rel_id as res_id + ,q.title as res_name + ,'old_course' as res_type + ,p.create_time + ,p.update_time + ,p.delete_tag + ,p.org_id + from tamp_community.news_feed_res p + inner join tamp_user.curriculum_column_rel t + on p.rel_id = t.id + left join tamp_user.curriculum_res q + on t.rel_id = q.id + where p.rel_type in (4, 5) + ''' + old_course_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return old_course_dict + + +def query_community_short_video_res(): + '''åœˆåæ·»åŠ çŸè§†é¢‘''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.news_feed_id + ,p.create_by as user_id + ,p.rel_id as res_id + ,coalesce(t.res_name, q.title) as res_name + ,'short_video' as res_type + ,p.create_time + ,p.update_time + ,p.delete_tag + ,p.org_id + from tamp_community.news_feed_res p + left join tamp_analysis.short_video_view t + on p.rel_id = t.res_id + left join tamp_user.curriculum_res q + on p.rel_id = q.id + where p.rel_type = 6 + ''' + short_video_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return short_video_dict + + +def query_community_public_fund_res(): + '''åœˆåæ·»åŠ å…¬å‹ŸåŸºé‡‘''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.news_feed_id + ,p.create_by as user_id + ,p.rel_id as res_id + ,t.fund_name as res_name + ,'public_fund' as res_type + ,p.create_time + ,p.update_time + ,p.delete_tag + ,p.org_id + from tamp_community.news_feed_res p + inner join tamp_fund.tx_fund_info t + on p.rel_id = t.id + where p.rel_type = 88 + and p.product_type = 0 + ''' + public_fund_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return public_fund_dict + + +def query_community_private_equity_res(): + '''åœˆåæ·»åŠ ç§å‹ŸåŸºé‡‘''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.news_feed_id + ,p.create_by as user_id + ,p.rel_id as res_id + ,t.fund_name as res_name + ,'private_equity' as res_type + ,p.create_time + ,p.update_time + ,p.delete_tag + ,p.org_id + from tamp_community.news_feed_res p + inner join tamp_fund.fund_info t + on p.rel_id = t.id + where p.rel_type = 88 + and p.product_type = 1 + ''' + private_equity_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return private_equity_dict + + +def query_community_tamp_fund_res(): + '''æ·»åŠ ç™½åå•基金''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.news_feed_id + ,p.create_by as user_id + ,p.rel_id as res_id + ,t.fund_name as res_name + ,'tamp_fund' as res_type + ,p.create_time + ,p.update_time + ,p.delete_tag + ,p.org_id + from tamp_community.news_feed_res p + inner join tamp_product.fund_info t + on p.rel_id = t.id + where p.rel_type = 88 + and p.product_type = 2 + ''' + tamp_fund_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return tamp_fund_dict + + +def query_community_private_fund_res(): + '''æ·»åŠ ç§æœ‰åŸºé‡‘''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.news_feed_id + ,p.create_by as user_id + ,p.rel_id as res_id + ,t.fund_name as res_name + ,'private_fund' as res_type + ,p.create_time + ,p.update_time + ,p.delete_tag + ,p.org_id + from tamp_community.news_feed_res p + inner join tamp_fund.ifa_imported_fund_info t + on p.rel_id = t.id + where p.rel_type = 88 + and p.product_type = 3 + ''' + private_fund_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return private_fund_dict + + +def query_community_ifa_nonav_res(): + '''æ·»åŠ æ— å‡€å€¼äº§å“''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.news_feed_id + ,p.create_by as user_id + ,p.rel_id as res_id + ,t.fund_name as res_name + ,'ifa_nonav' as res_type + ,p.create_time + ,p.update_time + ,p.delete_tag + ,p.org_id + from tamp_community.news_feed_res p + inner join tamp_fund.ifa_imported_nonav_fund t + on p.rel_id = t.id + where p.rel_type = 88 + and p.product_type = 4 + ''' + ifa_nonav_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return ifa_nonav_dict + + +def query_community_picture_res(): + '''æ·»åŠ å›¾ç‰‡''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.news_feed_id + ,p.create_by as user_id + ,p.rel_id as res_id + ,coalesce(t.original_name, '') as res_name + ,'picture' as res_type + ,p.create_time + ,p.update_time + ,p.delete_tag + ,p.org_id + from tamp_community.news_feed_res p + inner join tamp_user.cs_file_record t + on p.rel_id = t.guid + where p.rel_type = 122 + ''' + ifa_nonav_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return ifa_nonav_dict + + +def query_community_course_res(): + '''æ·»åŠ è¯¾ç¨‹''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.news_feed_id + ,p.create_by as user_id + ,p.rel_id as res_id + ,t.title as res_name + ,'course' as res_type + ,t.package_id as course_id + ,t.main_title as course_name + ,p.create_time + ,p.update_time + ,p.delete_tag + ,p.org_id + from tamp_community.news_feed_res p + inner join tamp_analysis.course_res_view t + on p.rel_id = t.id + where p.rel_type in (303, 304) + ''' + course_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return course_dict + + +if __name__ == '__main__': + dwd_user_community_res() \ No newline at end of file diff --git a/edw/dwd/user/dwd_user_content_order.py b/edw/dwd/user/dwd_user_content_order.py index a81d924b9e21bbabc2eed6e6b2a29225c8c42c80..8c6322a560e3514373ef9db95f2cec62b45dc3cd 100644 --- a/edw/dwd/user/dwd_user_content_order.py +++ b/edw/dwd/user/dwd_user_content_order.py @@ -7,17 +7,22 @@ import logging import os import sys -from common.file_uitil import get_file_path, get_file_name -from common.mysql_uitl import fetch_all, insert_batch, save_result +from common.mysql_uitl import fetch_all, save_result, save_etl_log +from common.time_util import now_str, YMDHMS_FORMAT + logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) # file_path = get_file_path() file_name = sys.argv[0] +file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] -def dwd_user_content_order(): +def dwd_user_content_order(data_dt): content_order_dict = query_dwd_user_content_order() - save_result('tamp_data_dwd', 'dwd_user_content_order', content_order_dict, file_name) + row = save_result('tamp_data_dwd', 'dwd_user_content_order', content_order_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dwd', 'dwd_user_content_order', data_dt, row, 'done', task_file, now_time) def query_dwd_user_content_order(): diff --git a/edw/dwd/user/dwd_user_first_login_client_time.py b/edw/dwd/user/dwd_user_first_login_client_time.py new file mode 100644 index 0000000000000000000000000000000000000000..4f354661e5ff05eea435ded3409d1ba901ee390f --- /dev/null +++ b/edw/dwd/user/dwd_user_first_login_client_time.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +''' +用户首次登录客户端时间,定时任务,æ¯å¤©è¿è¡Œä¸€æ¬¡ +''' +import logging +import sys +from common.mysql_uitl import fetch_all, insert_batch, save_result, update_batch + +logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.ERROR) +file_name = sys.argv[0] + + +def dwd_user_first_login_client_time(data_dt): + start_time = str(data_dt) + ' 00:00:00' + end_time = str(data_dt) + ' 23:59:59' + login_time_dict = query_first_login_time(start_time, end_time) + save_user_login_time(login_time_dict) + # update_user_name() + + +def query_first_login_time(start_time, end_time): + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select `uid` as user_id + ,min(if(env = 'ios',server_time,null)) as login_ios_time + ,min(if(env = 'android',server_time, null)) as login_android_time + ,min(if(env = 'PCManager',server_time, null)) as login_pc_time + ,min(if(env = 'wechat',server_time, null)) as login_wechat_time + ,min(if(env = 'xcx',server_time, null)) as login_xcx_time + # from tamp_analysis.access_log + from tamp_analysis.access_history_log + where event_type = '1002' + and server_time between %s and %s + and env <> '' + and env is not null + and `uid` <> '' + and `uid` is not null + group by `uid` + ''' + exist_users_dict = fetch_all(sql, (start_time, end_time)) + logging.info(f'{function_name} success') + return exist_users_dict + + +def save_user_login_time(login_time_dict): + if login_time_dict: + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + database = 'tamp_data_dwd' + table = 'dwd_user_first_login_client_time' + result_table = database + '.' + table + field_list = ['login_wechat_time', 'login_ios_time', 'login_android_time', 'login_pc_time', 'login_xcx_time'] + for field in field_list: + ios_dict_list = list() + android_dict_list = list() + pc_dict_list = list() + wechat_dict_list = list() + xcx_dict_list = list() + for i in login_time_dict: + keys = list(i.keys()) + for key in keys: + if key == 'login_wechat_time': + wechat_dict_list.append(dict(user_id=i['user_id'], login_wechat_time=i[field])) + elif key == 'login_ios_time': + ios_dict_list.append(dict(user_id=i['user_id'], login_ios_time=i[field])) + elif key == 'login_android_time': + android_dict_list.append(dict(user_id=i['user_id'], login_android_time=i[field])) + elif key == 'login_pc_time': + pc_dict_list.append(dict(user_id=i['user_id'], login_pc_time=i[field])) + elif key == 'login_xcx_time': + xcx_dict_list.append(dict(user_id=i['user_id'], login_xcx_time=i[field])) + else: + pass + + ''' åº”è¯¥åˆ†ä¸‰ç§æƒ…况,1.å˜åœ¨è¿™ä¸ªç”¨æˆ·ï¼Œå´æ²¡æœ‰ç™»å½•过这个客户端,2.å˜åœ¨è¿™ä¸ªç”¨æˆ·ï¼Œç™»å½•过这个客户端,3.没有å˜åœ¨è¿™ä¸ªç”¨æˆ·''' + # 结果表ä¸ï¼Œå˜åœ¨è¿™ä¸ªç”¨æˆ·ï¼Œä½†ä¹‹å‰æ²’有登录这个客户端的,则需è¦ç»™è¿™ä¸ªå®¢æˆ·ç«¯å¢žåŠ ç™»å½•æ—¶é—´ + sql1 = f'''select {field}, user_id from {result_table} + where user_id in ({','.join(["'%s'" % item['user_id'] for item in login_time_dict])}) + and {field} is null ''' + exist_users_not_login_dict = fetch_all(sql1, None) + exist_users_not_login_list = list() + if field == 'login_wechat_time': + exist_users_not_login_wechat_dict = [x for x in wechat_dict_list if x in exist_users_not_login_dict] + for i in exist_users_not_login_wechat_dict: + if i['login_wechat_time']: + exist_users_not_login_list.append([i[field], i['user_id']]) + elif field == 'login_ios_time': + exist_users_not_login_ios_dict = [x for x in ios_dict_list if x in exist_users_not_login_dict] + for i in exist_users_not_login_ios_dict: + if i['login_ios_time']: + exist_users_not_login_list.append([i[field], i['user_id']]) + elif field == 'login_android_time': + exist_users_not_login_android_dict = [x for x in android_dict_list if x in exist_users_not_login_dict] + for i in exist_users_not_login_android_dict: + if i['login_android_time']: + exist_users_not_login_list.append([i[field], i['user_id']]) + elif field == 'login_pc_time': + exist_users_not_login_pc_dict = [x for x in pc_dict_list if x in exist_users_not_login_dict] + for i in exist_users_not_login_pc_dict: + if i['login_pc_time']: + exist_users_not_login_list.append([i[field], i['user_id']]) + elif field == 'login_xcx_time': + exist_users_not_login_xcx_dict = [x for x in xcx_dict_list if x in exist_users_not_login_dict] + for i in exist_users_not_login_xcx_dict: + if i['login_xcx_time']: + exist_users_not_login_list.append([i[field], i['user_id']]) + + update = f'''update {result_table} set {field} = (%s) where user_id = (%s)''' + update_batch(update, exist_users_not_login_list) + # 结果表ä¸ï¼Œå˜åœ¨è¿™ä¸ªç”¨æˆ· + sql2 = f'''select {field}, user_id from {result_table} + where user_id in ({','.join(["'%s'" % item['user_id'] for item in login_time_dict])})''' + exist_users_dict = fetch_all(sql2, None) + not_exist_users_dict = list() + if field == 'login_wechat_time': + wechat_tmp = list() + for i in wechat_dict_list: + if i['login_wechat_time']: + wechat_tmp.append(i) + not_exist_users_dict = [x for x in wechat_tmp if x not in exist_users_dict] + elif field == 'login_ios_time': + ios_tmp = list() + for i in ios_dict_list: + print(i) + if i['login_ios_time']: + print(i['login_ios_time']) + ios_tmp.append(i) + not_exist_users_dict = [x for x in ios_tmp if x not in exist_users_dict] + elif field == 'login_android_time': + android_tmp = list() + for i in android_dict_list: + if i['login_android_time']: + android_tmp.append(i) + not_exist_users_dict = [x for x in android_tmp if x not in exist_users_dict] + elif field == 'login_pc_time': + pc_tmp = list() + for i in pc_dict_list: + if i['login_pc_time']: + pc_tmp.append(i) + not_exist_users_dict = [x for x in pc_tmp if x not in exist_users_dict] + elif field == 'login_xcx_time': + xcx_tmp = list() + for i in xcx_dict_list: + if i['login_xcx_time']: + xcx_tmp.append(i) + not_exist_users_dict = [x for x in xcx_tmp if x not in exist_users_dict] + save_result(database, table, not_exist_users_dict, file_name) + logging.info(f'{function_name} success') + + + # # print(result_list) + # # print(client_dict_list) + # # print(exist_users_not_login_dict) + # + # + # update = f'''update {result_table} set {field} = (%s) where user_id = (%s)''' + # + # # 在登录端å£ï¼Œå·²ç»æœ‰è¿‡ç™»å½•è¡Œè®°å½•çš„ï¼Œåˆ™ä¸æ›´æ–°æ—¶é—´ + # sql1 = f'''select user_id, {field} from {result_table} + # where user_id in ({','.join(["'%s'" % item['user_id'] for item in login_time_dict])})''' + # exist_users_dict = fetch_all(sql1, None) + # + # # print(update) + # # å·²ç»å˜åœ¨çš„用户,则更新å¦å¤–一个客户端登录的时间 + # (update, result_list) + # # ä¸å˜åœ¨çš„用户,则æ’å…¥æ–°æ•°æ® + # not_exist_users_dict = [x for x in client_dict_list if x not in exist_users_dict] + # save_result(database, table, not_exist_users_dict, file_name) + # logging.info(f'{function_name} success') + +def update_user_name(): + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + update tamp_data_dwd.dwd_user_first_login_client_time p + left join tamp_analysis.user_info_view t + on p.user_id = t.user_id + set p.real_name = t.real_name, + p.user_name = t.user_name, + p.nickname = t.nickname, + p.team_id = t.team_id + ''' + fetch_all(sql, None) + logging.info(f'{function_name} success') + + +if __name__ == '__main__': + import datetime + begin = datetime.date(2021, 1, 1) + end = datetime.date(2021, 3, 31) + data_dt = begin + delta = datetime.timedelta(days=1) + while data_dt <= end: + print(data_dt.strftime("%Y-%m-%d")) + dwd_user_first_login_client_time(data_dt) + data_dt += delta diff --git a/edw/dwd/user/dwd_user_first_login_client_time_bak.py b/edw/dwd/user/dwd_user_first_login_client_time_bak.py new file mode 100644 index 0000000000000000000000000000000000000000..d6fe65048d0e440d58815fd89b588ded0b0a7e97 --- /dev/null +++ b/edw/dwd/user/dwd_user_first_login_client_time_bak.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +''' +用户首次登录客户端时间,定时任务,æ¯å¤©è¿è¡Œä¸€æ¬¡ +''' +import logging +import sys +from common.mysql_uitl import fetch_all, insert_batch, save_result + +logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.ERROR) +file_name = sys.argv[0] + + +def dwd_user_first_login_client_time(data_dt): + start_time = str(data_dt) + ' 00:00:00' + end_time = str(data_dt) + ' 23:59:59' + database = 'tamp_data_dwd' + table = 'dwd_user_first_login_client_time' + # filed_list = ['wechat', 'ios', 'android', 'pc', 'xcx'] + filed_list = ['wechat', 'ios', 'android'] + for i in filed_list: + filed = 'login_' + i + '_time' + if i == 'pc': + exist_login_dict = query_exist_users(filed) + new_login_users_dict = query_new_users(start_time, end_time, filed, 'PCManager', exist_login_dict) + upsert(database, table, new_login_users_dict, filed) + else: + exist_login_dict = query_exist_users(filed) + new_login_users_dict = query_new_users(start_time, end_time, filed, i, exist_login_dict) + upsert(database, table, new_login_users_dict, filed) + + +def query_exist_users(field): + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = f'select user_id from tamp_data_dwd.dwd_user_first_login_client_time where {field} is not null' + exist_users_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return exist_users_dict + + +def query_new_users(start_time, end_time, field, param, exist_users_dict): + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + print(field) + sql = f''' + select `uid` as user_id + ,min(server_time) as {field} + from tamp_analysis.access_history_log + where server_time between %s and %s + and env = %s + and event_type = '1002' + and `uid` <> '' + and `uid` not in ({','.join(["'%s'" % item['user_id'] for item in exist_users_dict])}) + group by `uid` + ''' + print(sql, (start_time, end_time, param)) + new_users_dict = fetch_all(sql, (start_time, end_time, param)) + logging.info(f'{function_name} success') + return new_users_dict + + +def upsert(database, table, result_dict, field): + ''' + 更新或æ’入,å˜åœ¨åˆ™æ›´æ–°ï¼Œä¸å˜åœ¨ï¼Œåˆ™æ’å…¥ + :param database: + :param table: + :param result_dict: + :param field: + :return: + ''' + if result_dict: + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + result_table = database + '.' + table + logging.info(f'upsert {result_table} start') + + result_list = list() + for i in result_dict: + result_list.append([i[field], i['user_id']]) + sql = f'''select user_id from {result_table} + where user_id in ({','.join(["'%s'" % item['user_id'] for item in result_dict])}) + and {field} is not null ''' + exist_users_dict = fetch_all(sql, None) + update = f'''update {result_table} set {field} = (%s) where user_id = (%s)''' + print(update) + # å·²ç»å˜åœ¨çš„用户,则更新å¦å¤–一个客户端登录的时间 + insert_batch(update, result_list) + # ä¸å˜åœ¨çš„用户,则æ’å…¥æ–°æ•°æ® + not_exist_users_dict = [x for x in result_dict if x not in exist_users_dict] + print(not_exist_users_dict) + save_result(database, table, not_exist_users_dict, file_name) + + logging.info(f'{function_name} success') + + + +if __name__ == '__main__': + import datetime + begin = datetime.date(2021, 1, 1) + end = datetime.date(2021, 1, 17) + data_dt = begin + delta = datetime.timedelta(days=1) + while data_dt <= end: + print(data_dt.strftime("%Y-%m-%d")) + dwd_user_first_login_client_time(data_dt) + data_dt += delta diff --git a/edw/dwd/user/dwd_user_learn_course.py b/edw/dwd/user/dwd_user_learn_course.py index 1ebc53bfbe3b63bb8b06e29df985defb733f1fcb..560a4a27662364e3a025121f0cbb4d6af72ea997 100644 --- a/edw/dwd/user/dwd_user_learn_course.py +++ b/edw/dwd/user/dwd_user_learn_course.py @@ -5,23 +5,26 @@ 用户å¦ä¹ 课程明细统计,定时任务,æ¯2ä¸ªå°æ—¶è¿è¡Œä¸€æ¬¡ ''' import logging +import os import sys from common.file_uitil import get_file_path, get_file_name -from common.mysql_uitl import fetch_all, insert_batch - +from common.mysql_uitl import fetch_all, insert_batch, save_etl_log, save_result +from common.time_util import YMDHMS_FORMAT, now_str logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) # file_path = get_file_path() file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def dwd_user_learn_course(data_dt): start_time = str(data_dt) + ' 00:00:00' end_time = str(data_dt) + ' 23:59:59' learn_course_dict = query_dwd_user_learn_course(start_time, end_time) - save_dwd_user_learn_course(learn_course_dict) - # save_ads_user_learn_course(learn_course_dict) # 明细数æ®åŒå†™ + row = save_result('tamp_data_dwd', 'dwd_user_learn_course', learn_course_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dwd', 'dwd_user_learn_course', data_dt, row, 'done', task_file, now_time) def query_dwd_user_learn_course(start_time, end_time): diff --git a/edw/dwd/user/dwd_user_login_area.py b/edw/dwd/user/dwd_user_login_area.py index eb767ca8ecc7a0581d28fd7dfbf4c28c3941c943..2af2b2027d470e916895c455c45ed40b52b8e601 100644 --- a/edw/dwd/user/dwd_user_login_area.py +++ b/edw/dwd/user/dwd_user_login_area.py @@ -5,23 +5,27 @@ 用户登录地区,定时任务,æ¯2ä¸ªå°æ—¶è¿è¡Œä¸€æ¬¡ ''' import logging +import os import sys from common.file_uitil import get_file_path, get_file_name -from common.mysql_uitl import fetch_all, insert_batch +from common.mysql_uitl import fetch_all, insert_batch, save_result, save_etl_log +from common.time_util import YMDHMS_FORMAT, now_str logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) # file_path = get_file_path() # file_name = get_file_name() file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def dwd_user_login_area(data_dt): start_time = str(data_dt) + ' 00:00:00' end_time = str(data_dt) + ' 23:59:59' login_area_dict = query_user_login_area(start_time, end_time) - save_dwd_user_login_area(login_area_dict) - + row = save_result('tamp_data_dwd', 'save_dwd_user_login_area', login_area_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dwd', 'save_dwd_user_login_area', data_dt, row, 'done', task_file, now_time) def query_user_login_area(start_time, end_time): @@ -90,8 +94,8 @@ if __name__ == '__main__': # dwd_user_login_area() # dwd_user_login_environment(data_dt) import datetime - begin = datetime.date(2021, 4, 1) - end = datetime.date(2021, 9, 14) + begin = datetime.date(2021, 9, 17) + end = datetime.date(2021, 9, 17) data_dt = begin delta = datetime.timedelta(days=1) while data_dt <= end: diff --git a/edw/dwd/user/dwd_user_login_environment.py b/edw/dwd/user/dwd_user_login_environment.py index 4e0e8494e2b27df4078ce06e786d13ab93f22c78..339c52230e70cbf0ffba94c5021322b2227bf1e3 100644 --- a/edw/dwd/user/dwd_user_login_environment.py +++ b/edw/dwd/user/dwd_user_login_environment.py @@ -5,22 +5,27 @@ 用户登录环境,定时任务,æ¯2ä¸ªå°æ—¶è¿è¡Œä¸€æ¬¡ ''' import logging +import os import sys from common.file_uitil import get_file_path, get_file_name -from common.mysql_uitl import fetch_all, insert_batch +from common.mysql_uitl import fetch_all, insert_batch, save_result, save_etl_log +from common.time_util import YMDHMS_FORMAT, now_str logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) # file_path = get_file_path() # file_name = get_file_name() file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def dwd_user_login_environment(data_dt): start_time = str(data_dt) + ' 00:00:00' end_time = str(data_dt) + ' 23:59:59' login_env_dict = query_user_login_env(start_time, end_time) - save_dwd_user_login_env(login_env_dict) + row = save_result('tamp_data_dwd', 'dwd_user_login_environment', login_env_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dwd', 'dwd_user_login_environment', data_dt, row, 'done', task_file, now_time) # 查询用户登录环境 @@ -92,8 +97,8 @@ def save_dwd_user_login_env(ret): if __name__ == '__main__': # dwd_user_login_environment(data_dt) import datetime - begin = datetime.date(2021, 4, 1) - end = datetime.date(2021, 9, 14) + begin = datetime.date(2021, 9, 15) + end = datetime.date(2021, 9, 16) data_dt = begin delta = datetime.timedelta(days=1) while data_dt <= end: diff --git a/edw/dwd/user/dwd_user_login_pc_record.py b/edw/dwd/user/dwd_user_login_pc_record.py new file mode 100644 index 0000000000000000000000000000000000000000..962a8b92ad287399e81ef0fc0ef71f9cdfcb5a35 --- /dev/null +++ b/edw/dwd/user/dwd_user_login_pc_record.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +''' +用户首次登录pc的记录,定时任务,æ¯å¤©è¿è¡Œä¸€æ¬¡ +åŽé¢åºŸå¼ƒ +''' +import logging +import os +import sys +from common.mysql_uitl import fetch_all, insert_batch, save_result, save_etl_log +from common.time_util import now_str, YMDHMS_FORMAT + +logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) +file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] + + +def dwd_user_login_pc_record(data_dt): + start_time = str(data_dt) + ' 00:00:00' + end_time = str(data_dt) + ' 23:59:59' + exist_users_dict = query_exist_users() + new_users_dict = query_new_pc_users(start_time, end_time, exist_users_dict) + row = save_result('tamp_data_dwd', 'dwd_user_login_pc_record', new_users_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dwd', 'dwd_user_login_pc_record', data_dt, row, 'done', task_file, now_time) + + +# 查询已ç»å˜åœ¨çš„pc用户 +def query_exist_users(): + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = 'select user_id from tamp_data_dwd.dwd_user_login_pc_record' + exist_users_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return exist_users_dict + + +def query_new_pc_users(start_time, end_time, exist_users_dict): + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = f''' + select p.data_dt + ,p.user_id + ,coalesce(t.team_id, 0) as team_id + ,p.first_time + from + ( + select `uid` as user_id + ,min(date_format(server_time,'%%Y-%%m-%%d')) as data_dt + ,min(server_time) as first_time + from tamp_analysis.access_history_log + where server_time between %s and %s + and env = 'PCManager' + and event_type = '1002' + and `uid` <> '' + and `uid` not in ({','.join(["'%s'" % item['user_id'] for item in exist_users_dict])}) + group by `uid` + ) p + left join tamp_analysis.user_info_view t + on p.user_id = t.user_id + ''' + new_users_dict = fetch_all(sql, (start_time, end_time)) + logging.info(f'{function_name} success') + return new_users_dict + + +if __name__ == '__main__': + import datetime + begin = datetime.date(2021, 9, 18) + end = datetime.date(2021, 9, 22) + data_dt = begin + delta = datetime.timedelta(days=1) + while data_dt <= end: + print(data_dt.strftime("%Y-%m-%d")) + dwd_user_login_pc_record(data_dt) + data_dt += delta diff --git a/edw/dwd/user/dwd_user_login_phone_mode.py b/edw/dwd/user/dwd_user_login_phone_mode.py index 0f6085f83326e705a93e3e813369ea1643217176..fddf84ddd0056a831b956d0052a8c008e75f6dd6 100644 --- a/edw/dwd/user/dwd_user_login_phone_mode.py +++ b/edw/dwd/user/dwd_user_login_phone_mode.py @@ -5,15 +5,18 @@ 用户登录手机型å·,定时任务,æ¯2ä¸ªå°æ—¶è¿è¡Œä¸€æ¬¡ ''' import logging +import os import sys from common.file_uitil import get_file_path, get_file_name -from common.mysql_uitl import fetch_all, insert_batch +from common.mysql_uitl import fetch_all, insert_batch, save_result, save_etl_log +from common.time_util import now_str, YMDHMS_FORMAT logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) # file_path = get_file_path() # file_name = get_file_name() file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def dwd_user_login_phone_mode(data_dt): @@ -21,6 +24,9 @@ def dwd_user_login_phone_mode(data_dt): end_time = str(data_dt) + ' 23:59:59' login_phone_dict = query_user_login_phone_mode(start_time, end_time) save_dwd_user_login_phone_mode(login_phone_dict) + row = save_result('tamp_data_dwd', 'dwd_user_login_phone_mode', login_phone_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dwd', 'dwd_user_login_phone_mode', data_dt, row, 'done', task_file, now_time) def query_user_login_phone_mode(start_time, end_time): diff --git a/edw/dwd/user/dwd_user_share_event.py b/edw/dwd/user/dwd_user_share_event.py index 4190784e3b540b0f9f7c965d5a59ad2da03bda0b..94923d0cc057c345378659f65a50575fef8d273c 100644 --- a/edw/dwd/user/dwd_user_share_event.py +++ b/edw/dwd/user/dwd_user_share_event.py @@ -28,12 +28,14 @@ 4043 分享产å“首页 äº§å“ ''' import logging +import os import sys -from common.mysql_uitl import fetch_all, insert_batch - +from common.mysql_uitl import fetch_all, insert_batch, save_result, save_etl_log +from common.time_util import now_str, YMDHMS_FORMAT logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def dwd_user_share_event(data_dt): @@ -41,7 +43,10 @@ def dwd_user_share_event(data_dt): end_time = str(data_dt) + ' 23:59:59' share_dict = query_share_event() user_share_event_dict = query_dwd_user_share_event(share_dict, start_time, end_time) - save_dwd_user_share_event(user_share_event_dict) + # save_dwd_user_share_event(user_share_event_dict) + row = save_result('tamp_data_dwd', 'dwd_user_share_event', user_share_event_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dwd', 'dwd_user_share_event', data_dt, row, 'done', task_file, now_time) def query_share_event(): diff --git a/edw/dwd/user/dwd_user_studio_add_content.py b/edw/dwd/user/dwd_user_studio_add_content.py new file mode 100644 index 0000000000000000000000000000000000000000..55b4dc4398d572e43bbb8bf0fbcf57a712c6fac6 --- /dev/null +++ b/edw/dwd/user/dwd_user_studio_add_content.py @@ -0,0 +1,330 @@ +# -*- coding: utf-8 -*- + +''' +ç”¨æˆ·æ·»åŠ å·¥ä½œå®¤è®°å½•,æ•°æ®é‡ä¸å¤§ï¼Œå…¨é‡æ›´æ–° +''' +import logging +import os +import sys + +from common.mysql_uitl import save_result, fetch_all, save_etl_log +from common.time_util import now_str, YMDHMS_FORMAT + + +logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) +file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] + + +def dwd_user_studio_add_content(data_dt): + # 公募基金 + public_fund_dict = query_studio_add_public_fund() + row1 = save_result('tamp_data_dwd', 'dwd_user_studio_add_content', public_fund_dict, file_name) + + # ç§å‹ŸåŸºé‡‘ + private_equity_dict = query_studio_add_private_equity() + row2 = save_result('tamp_data_dwd', 'dwd_user_studio_add_content', private_equity_dict, file_name) + + # 白åå•基金 + tamp_fund_dict = query_studio_add_tamp_fund() + row3 = save_result('tamp_data_dwd', 'dwd_user_studio_add_content', tamp_fund_dict, file_name) + + # ç§æœ‰åŸºé‡‘ + private_fund_dict = query_studio_add_private_fund() + row4 = save_result('tamp_data_dwd', 'dwd_user_studio_add_content', private_fund_dict, file_name) + # + # æ— å‡€å€¼äº§å“ + ifa_nonav_dcit = query_studio_add_ifa_nonav() + row5 = save_result('tamp_data_dwd', 'dwd_user_studio_add_content', ifa_nonav_dcit, file_name) + + # ç›´æ’ + live_dict = query_studio_add_live() + row6 = save_result('tamp_data_dwd', 'dwd_user_studio_add_content', live_dict, file_name) + + # 课程 + course_dict = query_studio_add_course() + row7 = save_result('tamp_data_dwd', 'dwd_user_studio_add_content', course_dict, file_name) + + # çŸè§†é¢‘ + short_video_dict = query_studio_add_short_video() + row8 = save_result('tamp_data_dwd', 'dwd_user_studio_add_content', short_video_dict, file_name) + + # æŽ¢æ™®æ–‡ç« + tamp_article_dict = query_studio_add_tamp_article() + row9 = save_result('tamp_data_dwd', 'dwd_user_studio_add_content', tamp_article_dict, file_name) + + # ç§æœ‰æ–‡ç« + self_article_dict = query_studio_add_self_article() + row10 = save_result('tamp_data_dwd', 'dwd_user_studio_add_content', self_article_dict, file_name) + + row = row1 + row2 + row3 + row4 + row5 + row6 + row7 + row8 + row9 + row10 + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dwd', 'dwd_user_studio_add_content', data_dt, row, 'done', task_file, now_time) + + +def query_studio_add_public_fund(): + '''æ·»åŠ å…¬å‹ŸåŸºé‡‘''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.user_id + ,'public_fund' as res_type + ,p.target_id as res_id + ,t.fund_name as res_name + ,p.course_packageId as course_id + ,'' as course_name + ,p.create_time + ,p.update_time + ,p.delete_tag + from tamp_user.user_poster_attachement p + inner join tamp_fund.tx_fund_info t + on p.target_id = t.id + where p.`type` = 'product' + and p.sub_type = '0' + ''' + public_fund_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return public_fund_dict + + +def query_studio_add_private_equity(): + '''æ·»åŠ ç§å‹ŸåŸºé‡‘''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.user_id + ,'private_equity' as res_type + ,p.target_id as res_id + ,t.fund_name as res_name + ,p.course_packageId as course_id + ,'' as course_name + ,p.create_time + ,p.update_time + ,p.delete_tag + from tamp_user.user_poster_attachement p + inner join tamp_fund.fund_info t + on p.target_id = t.id + where p.`type` = 'product' + and p.sub_type = '1' + ''' + private_equity_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return private_equity_dict + + +def query_studio_add_tamp_fund(): + '''æ·»åŠ ç™½åå•基金''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.user_id + ,'tamp_fund' res_type + ,p.target_id as res_id + ,t.fund_name as res_name + ,p.course_packageId as course_id + ,'' as course_name + ,p.create_time + ,p.update_time + ,p.delete_tag + from tamp_user.user_poster_attachement p + inner join tamp_product.fund_info t + on p.target_id = t.id + where p.`type` = 'product' + and p.sub_type = '2' + ''' + tamp_fund_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return tamp_fund_dict + + +def query_studio_add_private_fund(): + '''æ·»åŠ ç§æœ‰åŸºé‡‘''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.user_id + ,'private_fund' as res_type + ,p.target_id as res_id + ,t.fund_name as res_name + ,p.course_packageId as course_id + ,'' as course_name + ,p.create_time + ,p.update_time + ,p.delete_tag + from tamp_user.user_poster_attachement p + inner join tamp_fund.ifa_imported_fund_info t + on p.target_id = t.id + where p.`type` = 'product' + and p.sub_type = '3' + ''' + public_fund_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return public_fund_dict + + +def query_studio_add_ifa_nonav(): + '''æ·»åŠ æ— å‡€å€¼äº§å“''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.user_id + ,'ifa_nonav' as res_type + ,p.target_id as res_id + ,t.fund_name as res_name + ,p.course_packageId as course_id + ,'' as course_name + ,p.create_time + ,p.update_time + ,p.delete_tag + from tamp_user.user_poster_attachement p + inner join tamp_fund.ifa_imported_nonav_fund t + on p.target_id = t.id + where p.`type` = 'product' + and p.sub_type = '4' + ''' + ifa_nonav_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return ifa_nonav_dict + + +def query_studio_add_live(): + '''æ·»åŠ ç›´æ’''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.user_id + ,'live' as res_type + ,p.target_id as res_id + ,t.zt_name as res_name + ,p.course_packageId as course_id + ,'' as course_name + ,p.create_time + ,p.update_time + ,p.delete_tag + from tamp_user.user_poster_attachement p + inner join tamp_zhibo.zhibo_theme t + on p.target_id = t.id + where p.`type` = 'zhibo' + ''' + live_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return live_dict + + +def query_studio_add_course(): + '''æ·»åŠ è¯¾ç¨‹''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.user_id + ,'course' as res_type + ,p.target_id as res_id + ,t.title as res_name + ,p.course_packageId as course_id + ,q.main_title as course_name + ,p.create_time + ,p.update_time + ,p.delete_tag + from tamp_user.user_poster_attachement p + inner join tamp_course.course t + on p.target_id = t.id + inner join tamp_course.course_package q + on p.course_packageId = q.id + where p.`type` = 'course' + ''' + course_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return course_dict + + +def query_studio_add_short_video(): + '''æ·»åŠ çŸè§†é¢‘''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.user_id + ,'short_video' as res_type + ,p.target_id as res_id + ,t.res_name + ,p.course_packageId as course_id + ,'' as course_name + ,p.create_time + ,p.update_time + ,p.delete_tag + from tamp_user.user_poster_attachement p + inner join tamp_analysis.short_video_view t + on p.target_id = t.res_id + where p.`type` = 'shortVideo' + ''' + short_video_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return short_video_dict + + +def query_studio_add_tamp_article(): + '''æ·»åŠ æŽ¢æ™®æ–‡ç« ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.user_id + ,case when t.ar_title is not null then 'tamp_article' + when q.af_title is not null then 'self_article' + else '' + end as res_type + ,p.target_id as res_id + ,coalesce(t.ar_title,q.af_title) as res_name + ,p.course_packageId as course_id + ,'' as course_name + ,p.create_time + ,p.update_time + ,p.delete_tag + from tamp_user.user_poster_attachement p + left join tamp_user.article t + on p.target_id = t.id + left join tamp_user.friend_article q + on p.target_id = q.id + where p.`type` = 'article' + and p.sub_type = 'tamp' + ''' + tamp_article_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return tamp_article_dict + + +def query_studio_add_self_article(): + '''æ·»åŠ ç§æœ‰æ–‡ç« ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select p.id + ,p.user_id + ,'self_article' as res_type + ,p.target_id as res_id + ,t.af_title as res_name + ,p.course_packageId as course_id + ,'' as course_name + ,p.create_time + ,p.update_time + ,p.delete_tag + from tamp_user.user_poster_attachement p + inner join tamp_user.friend_article t + on p.target_id = t.id + where p.`type` = 'article' + and p.sub_type = 'self' + ''' + self_article_dict = fetch_all(sql, None) + logging.info(f'{function_name} success') + return self_article_dict + + +if __name__ == '__main__': + dwd_user_studio_add_content() diff --git a/edw/dwd/user/dwd_user_visitor_clues.py b/edw/dwd/user/dwd_user_visitor_clues.py index 03b9546d82a6315515d72cb34c3659a4042c2ac7..504ea8381f4941d92ebbefa7248c670eddc84a86 100644 --- a/edw/dwd/user/dwd_user_visitor_clues.py +++ b/edw/dwd/user/dwd_user_visitor_clues.py @@ -6,9 +6,11 @@ ''' import logging +import os import sys -from common.mysql_uitl import fetch_all, insert_batch, save_result +from common.mysql_uitl import fetch_all, insert_batch, save_result, save_etl_log +from common.time_util import YMDHMS_FORMAT, now_str ''' p1005 ç›´æ’详情页 @@ -40,13 +42,17 @@ p13502 个人主页(访问å‘圈å用户的主页) logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def dwd_user_visitor_clues(data_dt): start_time = str(data_dt) + ' 00:00:00' end_time = str(data_dt) + ' 23:59:59' visitor_record_dict = query_dwd_user_visitor_record(start_time, end_time) - save_dwd_user_visit_clues(visitor_record_dict) + # save_dwd_user_visit_clues(visitor_record_dict) + row = save_result('tamp_data_dwd', 'dwd_user_visit_clues', visitor_record_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dwd', 'dwd_user_visit_clues', data_dt, row, 'done', task_file, now_time) def query_dwd_user_visitor_record(start_time, end_time): diff --git a/edw/dwd/user/dwd_user_watch_live.py b/edw/dwd/user/dwd_user_watch_live.py index 2dc30bd00686322ee634f16950f7f3f23608abb5..4b097488299416bfe46d84c8eecbefb83e01b857 100644 --- a/edw/dwd/user/dwd_user_watch_live.py +++ b/edw/dwd/user/dwd_user_watch_live.py @@ -8,19 +8,23 @@ import logging import os import sys from common.file_uitil import get_file_path, get_file_name -from common.mysql_uitl import fetch_all, insert_batch - +from common.mysql_uitl import fetch_all, insert_batch, save_result, save_etl_log +from common.time_util import YMDHMS_FORMAT, now_str logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) # file_path = get_file_path() file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def dwd_user_watch_live(data_dt): start_time = str(data_dt) + ' 00:00:00' end_time = str(data_dt) + ' 23:59:59' user_watch_dict = query_dwd_user_watch_live(start_time, end_time) - save_dwd_user_watch_live(user_watch_dict) + # save_dwd_user_watch_live(user_watch_dict) + row = save_result('tamp_data_dwd', 'dwd_user_watch', user_watch_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dwd', 'dwd_user_watch', data_dt, row, 'done', task_file, now_time) def query_dwd_user_watch_live(start_time, end_time): diff --git a/edw/dwd/user/dwd_user_watch_short_video.py b/edw/dwd/user/dwd_user_watch_short_video.py index 81d7c7785b1724b56462b670c36557222407809e..5fd370b8404cf32933a3f50ee274cd667d6628f5 100644 --- a/edw/dwd/user/dwd_user_watch_short_video.py +++ b/edw/dwd/user/dwd_user_watch_short_video.py @@ -7,19 +7,21 @@ import logging import os import sys -from common.file_uitil import get_file_path, get_file_name -from common.mysql_uitl import fetch_all, insert_batch, save_result +from common.mysql_uitl import fetch_all, save_result, save_etl_log +from common.time_util import YMDHMS_FORMAT, now_str logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) -# file_path = get_file_path() file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def dwd_user_watch_short_video(data_dt): start_time = str(data_dt) + ' 00:00:00' end_time = str(data_dt) + ' 23:59:59' watch_short_video_dict = query_dwd_user_watch_short_video(start_time, end_time) - save_result('tamp_data_dwd', 'dwd_user_watch_short_video', watch_short_video_dict, file_name) + row = save_result('tamp_data_dwd', 'dwd_user_watch_short_video', watch_short_video_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dwd', 'dwd_user_watch_short_video', data_dt, row, 'done', task_file, now_time) def query_dwd_user_watch_short_video(start_time, end_time): @@ -74,8 +76,8 @@ def query_dwd_user_watch_short_video(start_time, end_time): if __name__ == '__main__': import datetime - begin = datetime.date(2021, 4, 1) - end = datetime.date(2021, 9, 14) + begin = datetime.date(2021, 9, 15) + end = datetime.date(2021, 9, 21) data_dt = begin delta = datetime.timedelta(days=1) while data_dt <= end: diff --git a/edw/dws/user/dws_user_browse_fund.py b/edw/dws/user/dws_user_browse_fund.py index bedb899eade2de599a5c771f888e6e9d9e483de5..6114d27b528ee187d064f1cb9460acbeb6e05ac4 100644 --- a/edw/dws/user/dws_user_browse_fund.py +++ b/edw/dws/user/dws_user_browse_fund.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- import logging +import os import sys -from common.mysql_uitl import fetch_all, save_result, insert_batch, insert +from common.mysql_uitl import fetch_all, save_result, insert_batch, insert, save_etl_log +from common.time_util import now_str, YMDHMS_FORMAT ''' ç”¨æˆ·è§‚çœ‹ç›´æ’æ˜Žç»†æ•°æ®ç»Ÿè®¡,定时任务,æ¯2ä¸ªå°æ—¶è¿è¡Œä¸€æ¬¡ @@ -10,11 +12,14 @@ from common.mysql_uitl import fetch_all, save_result, insert_batch, insert ''' logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def dws_user_browse_fund(data_dt): browse_fund_dict = query_dws_user_browse_fund(data_dt) - save_result('tamp_data_dws', 'dws_user_browse_fund', browse_fund_dict, file_name) + row = save_result('tamp_data_dws', 'dws_user_browse_fund', browse_fund_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dws', 'dws_user_browse_fund', data_dt, row, 'done', task_file, now_time) def query_dws_user_browse_fund(data_dt): diff --git a/edw/dws/user/dws_user_clues.py b/edw/dws/user/dws_user_clues.py deleted file mode 100644 index 9075b09ebf5256ed903a645239b8b5d249c6cce5..0000000000000000000000000000000000000000 --- a/edw/dws/user/dws_user_clues.py +++ /dev/null @@ -1,67 +0,0 @@ -# -*- coding: utf-8 -*- -import logging -import sys - -from common.mysql_uitl import fetch_all - -''' -用户分享事件统计,定时任务,æ¯2ä¸ªå°æ—¶è¿è¡Œä¸€æ¬¡ -çŽ°åœ¨åªæœ‰äº§å“/ç›´æ’/课程的线索 -''' -logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) -file_name = sys.argv[0] - - - -def dws_user_clues(data_dt): - user_clues_dict = query_user_fund_clues() - save_user_clues(user_clues_dict) - - -def query_user_fund_clues(): - function_name = sys._getframe().f_code.co_name - logging.info(f'{function_name} start') - # 有的数æ®ï¼Œæœ‰è®¿é—®ï¼Œå´æ²¡æœ‰åˆ†äº« - # æ˜¯ä¸æ˜¯è¦å»ºè§†å›¾ - sql = ''' - select p.data_dt as share_dt - ,t.data_dt as visitor_dt - ,p.user_id - ,p.real_name - ,p.user_name - ,p.nickname - ,p.team_id - ,p.res_id - ,p.event_type - ,p.extra_id - ,p.source_user_id - ,p.source_uuid_id - ,p.local_time as share_time - ,t.local_time as visitor_time - from tamp_data_dwd.dwd_user_share_event p - left join tamp_data_dwd.dwd_user_visit_clues t - on p.source_user_id = t.source_user_id - and p.source_uuid_id = t.source_uuid_id - and p.res_id = t.res_id - ''' - user_clues_dict = fetch_all(sql, None) - logging.info(f'{function_name} success') - return user_clues_dict - - -def save_user_clues(user_clues_dict): - sql = ''' - ''' - -if __name__ == '__main__': - import datetime - begin = datetime.date(2021, 4, 1) - end = datetime.date(2021, 9, 14) - data_dt = begin - delta = datetime.timedelta(days=1) - while data_dt <= end: - print(data_dt.strftime("%Y-%m-%d")) - dws_user_clues(data_dt) - data_dt += delta - - diff --git a/edw/dws/user/dws_user_content_order.py b/edw/dws/user/dws_user_content_order.py index 05c1355c3c0f800fc6396771bcfecb86331a7a10..34bf38481aef89ba62052971cbbeba68e2c0a3e3 100644 --- a/edw/dws/user/dws_user_content_order.py +++ b/edw/dws/user/dws_user_content_order.py @@ -5,45 +5,53 @@ å†…å®¹è®¢å•æ•°æ®(å…¨é‡åŒæ¥ï¼Œè®¢å•é‡å¤šäº†ï¼Œå†ç”¨å¢žé‡),定时任务,æ¯10分钟è¿è¡Œä¸€æ¬¡ ''' import logging +import os import sys -from common.mysql_uitl import fetch_all, save_result +from common.mysql_uitl import fetch_all, save_result, save_etl_log +from common.time_util import now_str, YMDHMS_FORMAT + logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] -def dws_user_content_order(): +def dws_user_content_order(data_dt): # æ–°è¯¾ç¨‹è®¢å• course_order_dict = query_dws_course_order() - save_result('tamp_data_dws', 'dws_user_content_order', course_order_dict, file_name) + row1 = save_result('tamp_data_dws', 'dws_user_content_order', course_order_dict, file_name) # è€è¯¾ç¨‹è®¢å• old_course_order_dict = query_dws_old_course_order() - save_result('tamp_data_dws', 'dws_user_content_order', old_course_order_dict, file_name) + row2 = save_result('tamp_data_dws', 'dws_user_content_order', old_course_order_dict, file_name) # è´ä¹°æŽ¢æ™®è´ recharge_order_dict = query_dws_recharge_order() - save_result('tamp_data_dws', 'dws_user_content_order', recharge_order_dict, file_name) + row3 = save_result('tamp_data_dws', 'dws_user_content_order', recharge_order_dict, file_name) # ç›´æ’è®¢å• live_order_dict = query_dws_live_order() - save_result('tamp_data_dws', 'dws_user_content_order', live_order_dict, file_name) + row4 = save_result('tamp_data_dws', 'dws_user_content_order', live_order_dict, file_name) # è´ä¹°æ ç›®è®¢å• column_order_dict = query_dws_column_order() - save_result('tamp_data_dws', 'dws_user_content_order', column_order_dict, file_name) + row5 = save_result('tamp_data_dws', 'dws_user_content_order', column_order_dict, file_name) # è´ä¹°æ–°è¯¾ç¨‹è¯¾ä»¶ course_ware_order_dict = query_dws_course_ware_order() - save_result('tamp_data_dws', 'dws_user_content_order', course_ware_order_dict, file_name) + row6 = save_result('tamp_data_dws', 'dws_user_content_order', course_ware_order_dict, file_name) # è´ä¹°é™„ä»¶ file_order_dict = query_dws_file_order() - save_result('tamp_data_dws', 'dws_user_content_order', file_order_dict, file_name) + row7 = save_result('tamp_data_dws', 'dws_user_content_order', file_order_dict, file_name) # çº¿ä¸‹æ´»åŠ¨è®¢å• offline_activity_order_dict = query_dws_offline_activity_order() - save_result('tamp_data_dws', 'dws_user_content_order', offline_activity_order_dict, file_name) + row8 = save_result('tamp_data_dws', 'dws_user_content_order', offline_activity_order_dict, file_name) + + row = row1 + row2 + row3 + row4 + row5 + row6 + row7 + row8 + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dws', 'dws_user_content_order', data_dt, row, 'done', task_file, now_time) def query_dws_course_order(): diff --git a/edw/dws/user/dws_user_first_login_client_time.py b/edw/dws/user/dws_user_first_login_client_time.py new file mode 100644 index 0000000000000000000000000000000000000000..41df51c8f4c6efd0efd7cc8361b6b5f9f6967f8a --- /dev/null +++ b/edw/dws/user/dws_user_first_login_client_time.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +''' +用户首次登录客户端时间,定时任务,æ¯å¤©è¿è¡Œä¸€æ¬¡ +''' +import logging +import sys +from common.mysql_uitl import fetch_all, insert_batch, save_result + +logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.ERROR) +file_name = sys.argv[0] + + +def dws_user_first_login_client_time(data_dt): + login_time_dict = query_first_login_time(data_dt) + save_user_login_time(login_time_dict) + # update_user_name() + + +def query_first_login_time(data_dt): + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + sql = ''' + select user_id + ,real_name + ,user_name + ,nickname + ,team_id + ,min(if(env = 'ios',start_time,null)) as login_ios_time + ,min(if(env = 'android',start_time, null)) as login_android_time + ,min(if(env = 'PCManager',start_time, null)) as login_pc_time + ,min(if(env = 'wechat',start_time, null)) as login_wechat_time + ,min(if(env = 'xcx',start_time, null)) as login_xcx_time + from tamp_data_dwd.dwd_user_login_environment + where data_dt = %s + group by user_id, real_name, user_name, nickname, team_id + ''' + exist_users_dict = fetch_all(sql, data_dt) + logging.info(f'{function_name} success') + return exist_users_dict + + +def save_user_login_time(login_time_dict): + if login_time_dict: + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + database = 'tamp_data_dws' + table = 'dws_user_first_login_client_time' + result_table = database + '.' + table + field_list = ['login_wechat_time', 'login_ios_time', 'login_android_time', 'login_pc_time', 'login_xcx_time'] + for field in field_list: + result_list = list() + client_dict_list = list() + for i in login_time_dict: + if i[field]: + result_list.append([i['real_name'], i['user_name'], i['nickname'], i['team_id'], i[field], i['user_id']]) + client_dict_list.append(i) + sql = f'''select user_id + ,real_name + ,user_name + ,nickname + ,team_id + ,{field} from {result_table} + where user_id in ({','.join(["'%s'" % item['user_id'] for item in login_time_dict])}) + and {field} is not null ''' + exist_users_dict = fetch_all(sql, None) + update = f'''update {result_table} set real_name = (%s), user_name = (%s), nickname = (%s), team_id = (%s), + {field} = (%s) where user_id = (%s)''' + # å·²ç»å˜åœ¨çš„用户,则更新å¦å¤–一个客户端登录的时间 + insert_batch(update, result_list) + # ä¸å˜åœ¨çš„用户,则æ’å…¥æ–°æ•°æ® + not_exist_users_dict = [x for x in client_dict_list if x not in exist_users_dict] + save_result(database, table, not_exist_users_dict, file_name) + logging.info(f'{function_name} success') + + +if __name__ == '__main__': + import datetime + begin = datetime.date(2021, 9, 17) + end = datetime.date(2021, 9, 22) + data_dt = begin + delta = datetime.timedelta(days=1) + while data_dt <= end: + print(data_dt.strftime("%Y-%m-%d")) + dws_user_first_login_client_time(data_dt) + data_dt += delta diff --git a/edw/dws/user/dws_user_learn_course.py b/edw/dws/user/dws_user_learn_course.py index c4102eabb81477ab41162833ee8cb5a2887bd088..2ed4923255c21de7102e0e96a57f812f84c31074 100644 --- a/edw/dws/user/dws_user_learn_course.py +++ b/edw/dws/user/dws_user_learn_course.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- import logging +import os import sys -from common.mysql_uitl import fetch_all, save_result, insert_batch, insert +from common.mysql_uitl import fetch_all, save_result, insert_batch, insert, save_etl_log +from common.time_util import now_str, YMDHMS_FORMAT ''' 用户å¦ä¹ 课程明细/汇总数æ®ç»Ÿè®¡,定时任务,æ¯2ä¸ªå°æ—¶è¿è¡Œä¸€æ¬¡ @@ -12,6 +14,7 @@ from common.mysql_uitl import fetch_all, save_result, insert_batch, insert ''' logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def dws_user_learn_course(data_dt): @@ -20,8 +23,9 @@ def dws_user_learn_course(data_dt): learn_course_dict = query_dws_user_learn_course(data_dt) merge_share_course_dict = merge_share_course(share_single_course_dict, share_course_dict) merge_course_result_dict = merge_course_dict(learn_course_dict, merge_share_course_dict) - save_result('tamp_data_dws', 'dws_user_learn_course', merge_course_result_dict, file_name) - + row = save_result('tamp_data_dws', 'dws_user_learn_course', merge_course_result_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dws', 'dws_user_learn_course', data_dt, row, 'done', task_file, now_time) # 分享å•节课程 def query_dws_user_share_single_course_num(data_dt): diff --git a/edw/dws/user/dws_user_learn_total_dur.py b/edw/dws/user/dws_user_learn_total_dur.py index d724bde03c1238b957d364b22423092ba33fbc00..62267b70c7712e69e81cb03e25a9f0660aae3ae4 100644 --- a/edw/dws/user/dws_user_learn_total_dur.py +++ b/edw/dws/user/dws_user_learn_total_dur.py @@ -6,19 +6,25 @@ ''' import json import logging +import os import sys -from common.file_uitil import get_file_path, get_file_name -from common.mysql_uitl import fetch_all, insert_batch + +from common.mysql_uitl import fetch_all, insert_batch, save_result, save_etl_log +from common.time_util import now_str, YMDHMS_FORMAT logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] -def dws_user_learn_total_dur(): +def dws_user_learn_total_dur(data_dt): user_learn_total_dur_dict = query_dws_user_learn_total_dur() - save_dws_user_learn_total_dur(user_learn_total_dur_dict) + # save_dws_user_learn_total_dur(user_learn_total_dur_dict) + row = save_result('tamp_data_dws', 'dws_user_learn_total_dur', user_learn_total_dur_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dws', 'dws_user_learn_total_dur', data_dt, row, 'done', task_file, now_time) # æ•°æ®é‡å¤§é‡ï¼Œéœ€è¦ç”¨å¢žåŠ çš„æ–¹å¼æŸ¥è¯¢ï¼Œå¢žåŠ ä¸€å¼ åŽ†å²ç»Ÿè®¡è®°å½•表,增数æ®+åŽ†å²æ•°æ®=æ±‡æ€»æ•°æ® diff --git a/edw/dws/user/dws_user_login_environment.py b/edw/dws/user/dws_user_login_environment.py index dcbe3fa0cbcb745222710ac345141ccc81dee245..6e615468bf9ac8b247d593885413c56b47e66759 100644 --- a/edw/dws/user/dws_user_login_environment.py +++ b/edw/dws/user/dws_user_login_environment.py @@ -5,19 +5,25 @@ 用户登录环境,定时任务,æ¯2ä¸ªå°æ—¶è¿è¡Œä¸€æ¬¡ ''' import logging +import os import sys -from common.mysql_uitl import fetch_all, insert_batch, update_batch, fetch_all_list +from common.mysql_uitl import fetch_all, insert_batch, update_batch, fetch_all_list, save_result, save_etl_log +from common.time_util import now_str, YMDHMS_FORMAT logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] -def dws_user_login_environment(): +def dws_user_login_environment(data_dt): login_env_dict = query_dws_user_login_env() - save_dws_user_login_environment(login_env_dict) + # save_dws_user_login_environment(login_env_dict) + row = save_result('tamp_data_dws', 'dws_user_login_environment', login_env_dict, file_name) dws_user_recent_login_env_list = query_dws_user_recent_login_env() update_dws_user_recent_login_env(dws_user_recent_login_env_list) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dws', 'dws_user_login_environment', data_dt, row, 'done', task_file, now_time) def query_dws_user_login_env(): diff --git a/edw/dws/user/dws_user_login_phone_mode.py b/edw/dws/user/dws_user_login_phone_mode.py index 89c2ff839101893f0e05bf58baac6b822b855748..9870c932caff421eac4d8ea92f48f5d543df8493 100644 --- a/edw/dws/user/dws_user_login_phone_mode.py +++ b/edw/dws/user/dws_user_login_phone_mode.py @@ -5,18 +5,25 @@ 用户登录环境,定时任务,æ¯2ä¸ªå°æ—¶è¿è¡Œä¸€æ¬¡ ''' import logging +import os import sys -from common.file_uitil import get_file_path, get_file_name -from common.mysql_uitl import fetch_all, insert_batch, fetch_all_list + +from common.mysql_uitl import fetch_all, insert_batch, save_result, save_etl_log +from common.time_util import now_str, YMDHMS_FORMAT + logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] -def dws_user_login_phone_mode(): +def dws_user_login_phone_mode(data_dt): login_phone_mode_dict = query_user_use_commonly_phone() - save_dws_user_login_phone_mode(login_phone_mode_dict) + # save_dws_user_login_phone_mode(login_phone_mode_dict) + row = save_result('tamp_data_dws', 'dws_user_login_phone_mode', login_phone_mode_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dws', 'dws_user_login_phone_mode', data_dt, row, 'done', task_file, now_time) def query_user_use_commonly_phone(): diff --git a/edw/dws/user/dws_user_login_top_area.py b/edw/dws/user/dws_user_login_top_area.py index 4d043b4cffbfa071dfa7cfe756cb247a5954661a..d2378b55716e76bec25346a6c1985f52313228a0 100644 --- a/edw/dws/user/dws_user_login_top_area.py +++ b/edw/dws/user/dws_user_login_top_area.py @@ -6,9 +6,13 @@ ''' import json import logging +import os import sys -from common.mysql_uitl import fetch_all, insert_batch +from common.mysql_uitl import fetch_all, insert_batch, save_result, save_etl_log +from datetime import datetime + +from common.time_util import YMDHMS_FORMAT, now_str ''' 用户最近60天登陆最多的地区 @@ -16,17 +20,19 @@ from common.mysql_uitl import fetch_all, insert_batch logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def dws_user_login_top_area(data_dt): - # 计算åç§»é‡ - offset = datetime.timedelta(days=-60) - # èŽ·å–æƒ³è¦çš„æ—¥æœŸçš„æ—¶é—´ - start_date = (data_dt + offset).strftime('%Y-%m-%d') + start_date = (datetime.datetime.strptime(str(data_dt), "%Y-%m-%d") - datetime.timedelta(days=60)).strftime("%Y-%m-%d") end_date = data_dt login_area_dict = query_login_area(start_date, end_date) login_top_area_dict = get_top_area(login_area_dict) - save_dws_user_login_area(login_top_area_dict) + # row = save_dws_user_login_area(login_top_area_dict) + row = save_result('tamp_data_dws', 'dws_user_login_top_area', login_top_area_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dws', 'dws_user_login_top_area', data_dt, row, 'done', task_file, now_time) + def query_login_area(start_date, end_date): @@ -51,7 +57,6 @@ def query_login_area(start_date, end_date): ) p group by p.user_id ''' - print(start_date, end_date) login_area_dict = fetch_all(sql, (start_date, end_date)) logging.info(f'query_login_area success') return login_area_dict @@ -92,18 +97,20 @@ def save_dws_user_login_area(ret): values.append(tuple(i.values())) sql = f'''replace into tamp_data_dws.dws_user_login_top_area ( {fields} ) values ( {place_holder} )''' rs = insert_batch(sql, values) + logging.info(str(rs)) # if rs == row or rs == 2 * row: # å› ä¸ºè¿™é‡Œç”¨çš„æ˜¯replace if rs >= row: # å› ä¸ºæœ‰éƒ¨åˆ†æ•°æ®æ˜¯è¦†ç›– logging.info(f'save_dws_user_login_area success {row}') else: logging.error(f'save_dws_user_login_area error æ•°æ®ä¸º:{row}行,æ’å…¥æˆåŠŸä¸º:{rs} 行 执行程åºä¸ºï¼š{file_name}') + return row if __name__ == '__main__': # dws_user_login_area() import datetime - begin = datetime.date(2021, 4, 1) - end = datetime.date(2021, 9, 14) + begin = datetime.date(2021, 9, 17) + end = datetime.date(2021, 9, 18) data_dt = begin delta = datetime.timedelta(days=1) while data_dt <= end: diff --git a/edw/dws/user/dws_user_visitor_clues.py b/edw/dws/user/dws_user_visitor_clues.py deleted file mode 100644 index c27a534621ec97bf92abea22008d70a96853f7ae..0000000000000000000000000000000000000000 --- a/edw/dws/user/dws_user_visitor_clues.py +++ /dev/null @@ -1,25 +0,0 @@ -# -*- coding: utf-8 -*- -import logging -import sys - -''' -访问线索统计 -''' -logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) -file_name = sys.argv[0] - - -def dws_user_visitor_clues(data_dt): - pass - - -if __name__ == '__main__': - import datetime - begin = datetime.date(2021, 9, 7) - end = datetime.date(2021, 9, 7) - data_dt = begin - delta = datetime.timedelta(days=1) - while data_dt <= end: - print(data_dt.strftime("%Y-%m-%d")) - dws_user_visitor_clues(data_dt) - data_dt += delta diff --git a/edw/dws/user/dws_user_watch_live.py b/edw/dws/user/dws_user_watch_live.py index 891fb428a548dfd4e22a7a6992d6e27243dcbf53..501624cec10285327f515c6e2d1e5dadd2d8d737 100644 --- a/edw/dws/user/dws_user_watch_live.py +++ b/edw/dws/user/dws_user_watch_live.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- import logging +import os import sys -from common.mysql_uitl import fetch_all, save_result, insert_batch, insert +from common.mysql_uitl import fetch_all, save_result, insert_batch, insert, save_etl_log +from common.time_util import now, now_str, YMDHMS_FORMAT ''' ç”¨æˆ·è§‚çœ‹ç›´æ’æ˜Žç»†æ•°æ®ç»Ÿè®¡,定时任务,æ¯2ä¸ªå°æ—¶è¿è¡Œä¸€æ¬¡ @@ -10,13 +12,18 @@ from common.mysql_uitl import fetch_all, save_result, insert_batch, insert ''' logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] + def dws_user_watch_live(data_dt): share_live_dict = query_dws_user_share_live_num(data_dt) watch_live_dict = query_dws_user_watch_live(data_dt) merge_live_result_dict = merge_live_dict(watch_live_dict, share_live_dict) - save_result('tamp_data_dws', 'dws_user_watch_live', merge_live_result_dict, file_name) + row = save_result('tamp_data_dws', 'dws_user_watch_live', merge_live_result_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dws', 'dws_user_watch_live', data_dt, row, 'done', task_file, now_time) + # åŠ ä¾èµ–关系,也需è¦è‡ªåЍé…ç½® # åˆ†äº«ç›´æ’ @@ -110,12 +117,16 @@ def merge_live_dict(watch_live_dict, share_live_dict): logging.info(f'{function_name} success') return merge_live_result +# +# def save_etl_log(row, data_dt): +# sql = '''insert into tamp_data_dwd.dwd_etl_log (data_dt, ) +# ''' if __name__ == '__main__': import datetime - begin = datetime.date(2021, 4, 1) - end = datetime.date(2021, 9, 14) + begin = datetime.date(2021, 9, 21) + end = datetime.date(2021, 9, 22) data_dt = begin delta = datetime.timedelta(days=1) while data_dt <= end: diff --git a/edw/dws/user/dws_user_watch_short_video.py b/edw/dws/user/dws_user_watch_short_video.py index 42680622d19dd0ac0d49b816a9e20882073dab3b..97b189a4b5b02c108811e1e7eb722da649ad6412 100644 --- a/edw/dws/user/dws_user_watch_short_video.py +++ b/edw/dws/user/dws_user_watch_short_video.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- import logging +import os import sys -from common.mysql_uitl import fetch_all, save_result, insert_batch, insert +from common.mysql_uitl import fetch_all, save_result, insert_batch, insert, save_etl_log +from common.time_util import YMDHMS_FORMAT, now_str ''' 用户观看çŸè§†é¢‘明细数æ®ç»Ÿè®¡,定时任务,æ¯2ä¸ªå°æ—¶è¿è¡Œä¸€æ¬¡ @@ -10,13 +12,16 @@ from common.mysql_uitl import fetch_all, save_result, insert_batch, insert ''' logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] +task_file = os.path.split(__file__)[-1].split(".")[0] def dws_user_watch_short_video(data_dt): share_short_video_dict = query_dws_user_share_short_video_num(data_dt) watch_short_video_dict = query_dws_user_watch_short_video(data_dt) merge_short_video_result_dict = merge_short_video_dict(watch_short_video_dict, share_short_video_dict) - save_result('tamp_data_dws', 'dws_user_watch_short_video', merge_short_video_result_dict, file_name) + row = save_result('tamp_data_dws', 'dws_user_watch_short_video', merge_short_video_result_dict, file_name) + now_time = now_str(YMDHMS_FORMAT) + save_etl_log('tamp_data_dws', 'dws_user_watch_short_video', data_dt, row, 'done', task_file, now_time) # 分享çŸè§†é¢‘ diff --git a/edw/tasks/scheduled_tasks.py b/edw/tasks/scheduled_tasks.py index 5ef9b8c5a5664cd854539ac19fa07d958e5361a0..109bec5aa26ed4f63ad58c700a3c13f30d58a334 100644 --- a/edw/tasks/scheduled_tasks.py +++ b/edw/tasks/scheduled_tasks.py @@ -2,8 +2,6 @@ import logging import sys -from edw.ads.user.ads_user_content_order_records import ads_user_content_order_records -from edw.dwd.user.dwd_user_content_order import dwd_user_content_order from edw.dws.user.dws_user_content_order import dws_user_content_order ''' @@ -14,6 +12,20 @@ from edw.dws.user.dws_user_content_order import dws_user_content_order from apscheduler.schedulers.blocking import BlockingScheduler from common.time_util import get_run_time + +from edw.ads.basic.ads_app_install import ads_app_install +from edw.ads.basic.ads_platform_active import ads_platform_active +from edw.ads.basic.ads_user_level_active import ads_user_level_active +from edw.ads.user.ads_user_content_order_records import ads_user_content_order_records +from edw.ads.user.ads_user_watch_short_video import ads_user_watch_short_video +from edw.dwd.basic.dwd_app_install import dwd_app_install +from edw.dwd.user.dwd_user_content_order import dwd_user_content_order +from edw.dwd.user.dwd_user_first_login_client_time import dwd_user_first_login_client_time +from edw.dwd.user.dwd_user_login_pc_record import dwd_user_login_pc_record +from edw.dwd.user.dwd_user_studio_add_content import dwd_user_studio_add_content +from edw.dwd.user.dwd_user_watch_short_video import dwd_user_watch_short_video +from edw.dws.user.dws_user_first_login_client_time import dws_user_first_login_client_time +from edw.dws.user.dws_user_watch_short_video import dws_user_watch_short_video from edw.ads.user.ads_user_basic_behavior import ads_user_basic_behavior from edw.ads.user.ads_user_browse_fund import ads_user_browse_fund from edw.ads.user.ads_user_learn_course import ads_user_learn_course @@ -32,9 +44,9 @@ from edw.dws.user.dws_user_learn_total_dur import dws_user_learn_total_dur from edw.dws.user.dws_user_login_top_area import dws_user_login_top_area from edw.dws.user.dws_user_login_environment import dws_user_login_environment from edw.dws.user.dws_user_login_phone_mode import dws_user_login_phone_mode -from edw.dws.user.dws_user_visitor_clues import dws_user_visitor_clues from edw.dws.user.dws_user_watch_live import dws_user_watch_live from edw.ods.user.ods_users_info import ods_users_info +from edw.dwd.user.dwd_user_community_res import dwd_user_community_res logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) file_name = sys.argv[0] @@ -49,30 +61,42 @@ data_dt = run_hour_time[0: 10] def dwd_task_minute(): + ''' + 按分钟定时调度任务 + ''' function_name = sys._getframe().f_code.co_name logging.info(f'{function_name} start') # å†…å®¹è®¢å•æ•°æ® - dwd_user_content_order() + dwd_user_content_order(data_dt) logging.info(f'{function_name} end') def dws_task_minute(): + ''' + 按分钟定时调度任务 + ''' function_name = sys._getframe().f_code.co_name logging.info(f'{function_name} start') # å†…å®¹è®¢å•æ•°æ® - dws_user_content_order() + dws_user_content_order(data_dt) logging.info(f'{function_name} end') def ads_task_minute(): + ''' + 按分钟定时调度任务 + ''' function_name = sys._getframe().f_code.co_name logging.info(f'{function_name} start') # å†…å®¹è®¢å•æ•°æ® - ads_user_content_order_records() + ads_user_content_order_records(data_dt) logging.info(f'{function_name} end') def ods_task_hour(): + ''' + æŒ‰å°æ—¶å®šæ—¶è°ƒåº¦ä»»åŠ¡ + ''' function_name = sys._getframe().f_code.co_name logging.info(f'{function_name} start') # ç”¨æˆ·ä¿¡æ¯ @@ -80,41 +104,93 @@ def ods_task_hour(): logging.info(f'{function_name} end') + def dwd_task_hour(): + ''' + æŒ‰å°æ—¶å®šæ—¶è°ƒåº¦ä»»åŠ¡ + ''' function_name = sys._getframe().f_code.co_name logging.info(f'{function_name} start') - dwd_user_watch_live(data_dt) dwd_user_browse_fund(data_dt) - dwd_user_learn_course(data_dt) + dwd_user_community_res(data_dt) dwd_user_login_area(data_dt) + dwd_user_learn_course(data_dt) dwd_user_login_environment(data_dt) dwd_user_login_phone_mode(data_dt) dwd_user_share_event(data_dt) + dwd_user_studio_add_content(data_dt) dwd_user_visitor_clues(data_dt) + dwd_user_watch_live(data_dt) + dwd_user_watch_short_video(data_dt) logging.info(f'{function_name} end') def dws_task_hour(): + ''' + æŒ‰å°æ—¶å®šæ—¶è°ƒåº¦ä»»åŠ¡ + ''' function_name = sys._getframe().f_code.co_name logging.info(f'{function_name} start') - dws_user_browse_fund(data_dt) - dws_user_learn_course(data_dt) - dws_user_learn_total_dur() dws_user_login_top_area(data_dt) - dws_user_login_environment() - dws_user_login_phone_mode() - dws_user_visitor_clues(data_dt) + dws_user_login_phone_mode(data_dt) + dws_user_learn_total_dur(data_dt) + dws_user_first_login_client_time(data_dt) + dws_user_login_environment(data_dt) + dws_user_learn_course(data_dt) + dws_user_browse_fund(data_dt) dws_user_watch_live(data_dt) + dws_user_watch_short_video(data_dt) logging.info(f'{function_name} end') def ads_task_hour(): + ''' + æŒ‰å°æ—¶å®šæ—¶è°ƒåº¦ä»»åŠ¡ + ''' function_name = sys._getframe().f_code.co_name logging.info(f'{function_name} start') - ads_user_basic_behavior() + ads_user_basic_behavior(data_dt) ads_user_browse_fund(data_dt) ads_user_learn_course(data_dt) ads_user_watch_live(data_dt) + ads_user_watch_short_video(data_dt) + ads_user_content_order_records(data_dt) + logging.info(f'{function_name} end') + + + +def dwd_task_day(): + ''' + 按天定时调度任务 + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + dwd_app_install(data_dt) + dwd_user_login_pc_record(data_dt) + dwd_user_first_login_client_time(data_dt) + + logging.info(f'{function_name} end') + + +def dws_task_day(): + ''' + 按天定时调度任务 + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + + logging.info(f'{function_name} end') + + +def ads_task_day(): + ''' + 按天定时调度任务 + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + ads_user_level_active(data_dt) + ads_platform_active(data_dt) + ads_app_install(data_dt) logging.info(f'{function_name} end') @@ -128,10 +204,12 @@ if __name__ == '__main__': # ä¸¤ä¸ªå°æ—¶å®šæ—¶è°ƒåº¦ # scheduler.add_job(ods_task_hour, "interval", hours=2) - scheduler.add_job(dwd_task_hour, "interval", hours=2) - scheduler.add_job(dws_task_hour, "interval", hours=2) - scheduler.add_job(ads_task_hour, "interval", hours=2) - # 按天调度 - # scheduler.add_job(scheduler_day_job, "interval", minutes=20) - + scheduler.add_job(dwd_task_hour, "interval", minutes=30) + scheduler.add_job(dws_task_hour, "interval", minutes=30) + scheduler.add_job(ads_task_hour, "interval", minutes=30) + + # 按天定时任务 + scheduler.add_job(dwd_task_day, "interval", hours=1) + scheduler.add_job(dws_task_day, "interval", hours=1) + scheduler.add_job(ads_task_day, "interval", hours=1) scheduler.start() diff --git a/edw/tasks/scheduled_tasks_bak.py b/edw/tasks/scheduled_tasks_bak.py new file mode 100644 index 0000000000000000000000000000000000000000..4771c2d32986a5303a9f4abdb282da9af7d8a610 --- /dev/null +++ b/edw/tasks/scheduled_tasks_bak.py @@ -0,0 +1,198 @@ +# -*- coding: utf-8 -*- +import logging +import sys + +from edw.ads.basic.ads_app_install import ads_app_install +from edw.ads.user.ads_user_content_order_records import ads_user_content_order_records +from edw.dwd.basic.dwd_app_install import dwd_app_install +from edw.dwd.user.dwd_user_content_order import dwd_user_content_order +from edw.dwd.user.dwd_user_login_pc_record import dwd_user_login_pc_record +from edw.dws.user.dws_user_content_order import dws_user_content_order + +''' +调度任务,分层ods,dwd,dws,ads 分层调度 +è°ƒåº¦åˆæ ¹æ®ä¸šåŠ¡éœ€æ±‚ï¼Œåˆ†ä¸ºæŒ‰å°æ—¶è°ƒåº¦å’ŒæŒ‰å¤©è°ƒåº¦ +''' + +from apscheduler.schedulers.blocking import BlockingScheduler + +from common.time_util import get_run_time +from edw.ads.user.ads_user_basic_behavior import ads_user_basic_behavior +from edw.ads.user.ads_user_browse_fund import ads_user_browse_fund +from edw.ads.user.ads_user_learn_course import ads_user_learn_course +from edw.ads.user.ads_user_watch_live import ads_user_watch_live +from edw.dwd.user.dwd_user_browse_fund import dwd_user_browse_fund +from edw.dwd.user.dwd_user_learn_course import dwd_user_learn_course +from edw.dwd.user.dwd_user_login_area import dwd_user_login_area +from edw.dwd.user.dwd_user_login_environment import dwd_user_login_environment +from edw.dwd.user.dwd_user_login_phone_mode import dwd_user_login_phone_mode +from edw.dwd.user.dwd_user_share_event import dwd_user_share_event +from edw.dwd.user.dwd_user_visitor_clues import dwd_user_visitor_clues +from edw.dwd.user.dwd_user_watch_live import dwd_user_watch_live +from edw.dws.user.dws_user_browse_fund import dws_user_browse_fund +from edw.dws.user.dws_user_learn_course import dws_user_learn_course +from edw.dws.user.dws_user_learn_total_dur import dws_user_learn_total_dur +from edw.dws.user.dws_user_login_top_area import dws_user_login_top_area +from edw.dws.user.dws_user_login_environment import dws_user_login_environment +from edw.dws.user.dws_user_login_phone_mode import dws_user_login_phone_mode +from edw.dws.user.dws_user_visitor_clues import dws_user_visitor_clues +from edw.dws.user.dws_user_watch_live import dws_user_watch_live +from edw.ods.user.ods_users_info import ods_users_info + +logging.basicConfig(format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO) +file_name = sys.argv[0] + +# 按分钟定时 +run_minute_time = get_run_time(30) # 滞åŽ30åˆ†é’Ÿï¼ŒåŽ»å–æ—¶é—´ã€‚æ—¶é—´å–值范围有全é‡ï¼Œè¿˜æœ‰æŒ‰å¤©è®¡ç®—的(按天计算时,时间范围å–值都是从0点~24点) +data_minute_dt = run_minute_time[0: 10] + +# 延迟2.5 å°æ—¶ï¼Œå޻喿•°ï¼Œå› 为定时任务是æ¯éš”2ä¸ªå°æ—¶è¿è¡Œ +run_hour_time = get_run_time(60 * 2.5) +data_dt = run_hour_time[0: 10] + + +def dwd_task_minute(): + ''' + 按分钟定时调度任务 + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + # å†…å®¹è®¢å•æ•°æ® + dwd_user_content_order() + logging.info(f'{function_name} end') + + +def dws_task_minute(): + ''' + 按分钟定时调度任务 + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + # å†…å®¹è®¢å•æ•°æ® + dws_user_content_order() + logging.info(f'{function_name} end') + + +def ads_task_minute(): + ''' + 按分钟定时调度任务 + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + # å†…å®¹è®¢å•æ•°æ® + ads_user_content_order_records() + logging.info(f'{function_name} end') + + +def ods_task_hour(): + ''' + æŒ‰å°æ—¶å®šæ—¶è°ƒåº¦ä»»åŠ¡ + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + # ç”¨æˆ·ä¿¡æ¯ + ods_users_info() + logging.info(f'{function_name} end') + + + +def dwd_task_hour(): + ''' + æŒ‰å°æ—¶å®šæ—¶è°ƒåº¦ä»»åŠ¡ + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + dwd_user_watch_live(data_dt) + dwd_user_browse_fund(data_dt) + dwd_user_learn_course(data_dt) + dwd_user_login_area(data_dt) + dwd_user_login_environment(data_dt) + dwd_user_login_phone_mode(data_dt) + dwd_user_share_event(data_dt) + dwd_user_visitor_clues(data_dt) + logging.info(f'{function_name} end') + + +def dws_task_hour(): + ''' + æŒ‰å°æ—¶å®šæ—¶è°ƒåº¦ä»»åŠ¡ + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + dws_user_browse_fund(data_dt) + dws_user_learn_course(data_dt) + dws_user_learn_total_dur() + dws_user_login_top_area(data_dt) + dws_user_login_environment() + dws_user_login_phone_mode() + dws_user_visitor_clues(data_dt) + dws_user_watch_live(data_dt) + logging.info(f'{function_name} end') + + +def ads_task_hour(): + ''' + æŒ‰å°æ—¶å®šæ—¶è°ƒåº¦ä»»åŠ¡ + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + ads_user_basic_behavior(data_dt) + ads_user_browse_fund(data_dt) + ads_user_learn_course(data_dt) + ads_user_watch_live(data_dt) + logging.info(f'{function_name} end') + + + +def dwd_task_day(): + ''' + 按天定时调度任务 + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + dwd_app_install(data_dt) + dwd_user_login_pc_record(data_dt) + logging.info(f'{function_name} end') + + +def dws_task_day(): + ''' + 按天定时调度任务 + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + + logging.info(f'{function_name} end') + + +def ads_task_day(): + ''' + 按天定时调度任务 + ''' + function_name = sys._getframe().f_code.co_name + logging.info(f'{function_name} start') + ads_app_install(data_dt) + logging.info(f'{function_name} end') + + +if __name__ == '__main__': + scheduler = BlockingScheduler() + + # 按分定时调度 + scheduler.add_job(dwd_task_minute, "interval", minutes=10) + scheduler.add_job(dws_task_minute, "interval", minutes=10) + scheduler.add_job(ads_task_minute, "interval", minutes=10) + + # ä¸¤ä¸ªå°æ—¶å®šæ—¶è°ƒåº¦ + # scheduler.add_job(ods_task_hour, "interval", hours=2) + scheduler.add_job(dwd_task_hour, "interval", hours=2) + scheduler.add_job(dws_task_hour, "interval", hours=2) + scheduler.add_job(ads_task_hour, "interval", hours=2) + # 按天调度 + # scheduler.add_job(scheduler_day_job, "interval", minutes=20) + + # 按天定时任务 + scheduler.add_job(dwd_task_day, "interval", hours=3) + scheduler.add_job(dws_task_day, "interval", hours=3) + scheduler.add_job(ads_task_day, "interval", hours=3) + scheduler.start()