Column-Level SQL Lineage Demo
WITH net_aum_df AS( SELECT area, uid, create_time, deposit_date, deposit_type, SUM(real_amount) AS real_amount FROM ( SELECT area, uid, create_time, deposit_date, deposit_type, SUM(real_amount) real_amount FROM in_log_df WHERE deposit_date BETWEEN concat( '2022-', if((FLOOR('12' / 3.1) * 3) + 1 < 10, '0', ''), (FLOOR('12' / 3.1) * 3) + 1, '-01' ) AND '2022-12-30' GROUP BY area, uid, create_time, deposit_date, deposit_type UNION ALL SELECT area, uid, create_time, deposit_date, deposit_type, SUM(real_amount) real_amount FROM out_log_df WHERE deposit_date BETWEEN concat( '2022-', if((FLOOR('12' / 3.1) * 3) + 1 < 10, '0', ''), (FLOOR('12' / 3.1) * 3) + 1, '-01' ) AND '2022-12-30' GROUP BY area, uid, create_time, deposit_date, deposit_type ) a GROUP BY area, uid, create_time, deposit_date, deposit_type ), user_type AS( SELECT a.uid, a.area, a.`quarter`, a.first_cash_stock_in_time, a.first_esop_time, a.first_cash_stock_in_date, a.first_esop_date, if( a.first_cash_stock_in_date >= concat( '2022-', if((FLOOR('12' / 3.1) * 3) + 1 < 10, '0', ''), (FLOOR('12' / 3.1) * 3) + 1, '-01' ), 'new', b.user_type ) AS user_type FROM ( SELECT user_id AS uid, 'HK' AS area, CASE WHEN '2022-12-30' BETWEEN '2023-04-01' AND '2023-06-30' THEN 'Q2' WHEN '2022-12-30' BETWEEN '2023-07-01' AND '2023-09-30' THEN 'Q3' WHEN '2022-12-30' BETWEEN '2023-10-01' AND '2023-12-31' THEN 'Q4' END `quarter`, first_cash_stock_in_time, first_esop_time, from_unixtime(first_cash_stock_in_time, 'yyyy-MM-dd') AS first_cash_stock_in_date, from_unixtime(first_esop_time, 'yyyy-MM-dd') AS first_esop_date FROM net_success_time WHERE from_unixtime(first_cash_stock_in_time, 'yyyy-MM-dd') <= '2022-12-30' ) a LEFT JOIN lossing_user b ON a.uid = b.uid AND a.`quarter` = b.`quarter` AND a.area = b.area ), trans_pc_aum AS ( SELECT 'HK' AS area, MONTH(a.date_key) AS MONTH, COUNT(DISTINCT a.uid) AS m_new_trans_pc, SUM(real_amount) AS m_new_follow_aum, SUM(trans_amt) AS m_first_trans_aum , SUM(flag_stay) AS total_pc_unloss_k FROM ( SELECT DISTINCT uid, date_key, area, if(uid_leave IS NOT NULL, 1, 0) AS flag_stay, nvl(expand_cash_amt, 0) + nvl(expand_stock_amt, 0) + nvl(transpc_first_stock_amt, 0) + nvl(transpc_first_cash_amt, 0) AS trans_amt FROM expand_pc_aum_df WHERE date_key BETWEEN concat( '2022-', if((FLOOR('12' / 3.1) * 3) + 1 < 10, '0', ''), (FLOOR('12' / 3.1) * 3) + 1, '-01' ) AND '2022-12-30' AND uid_status <> 'delete' ) a LEFT JOIN ( SELECT uid, area, SUM(real_amount) real_amount FROM inter_net_aum_df WHERE dt = '2022-12-30' GROUP BY uid, area ) b ON a.uid = b.uid AND a.area = b.area GROUP BY MONTH(a.date_key) ), recall_pc_aum AS( SELECT 'HK' AS area, MONTH(reflow_date) AS MONTH, COUNT(DISTINCT a.uid) AS m_recall_pc, SUM(real_amount) AS m_recall_follow_aum FROM ( SELECT DISTINCT uid, area, reflow_date FROM recall_pc_aum_df WHERE datediff(reflow_create_date, effective_follow_date) BETWEEN 0 AND 14 AND reflow_date BETWEEN concat( '2022-', if((FLOOR('12' / 3.1) * 3) + 1 < 10, '0', ''), (FLOOR('12' / 3.1) * 3) + 1, '-01' ) AND '2022-12-30' ) a LEFT JOIN ( SELECT uid, area, SUM(real_amount) real_amount FROM inter_net_aum_df WHERE dt = '2022-12-30' GROUP BY uid, area ) b ON a.uid = b.uid AND a.area = b.area GROUP BY MONTH(reflow_date) ), stay_pc AS( SELECT nvl(a.area, 0) AS area, MONTH(update_time) AS MONTH, SUM(m_newloss_pc) m_newloss_pc, SUM(m_intercept_pc) m_intercept_pc FROM ( SELECT area, substr(assets_complete_date, 1, 10) AS update_time, COUNT(DISTINCT uid) AS m_newloss_pc, 0 AS m_intercept_pc FROM save_uid_df WHERE substr(assets_complete_date, 1, 10) BETWEEN concat( '2022-', if((FLOOR('12' / 3.1) * 3) + 1 < 10, '0', ''), (FLOOR('12' / 3.1) * 3) + 1, '-01' ) AND '2022-12-30' GROUP BY area, substr(assets_complete_date, 1, 10) UNION ALL SELECT area, date_key AS update_time, 0 AS m_newloss_pc, COUNT(DISTINCT uid) AS m_intercept_pc FROM ( SELECT area AS area, MAX(date_key) AS date_key, uid AS uid FROM intercept_uid_df WHERE flag_success = 1 AND date_key BETWEEN concat( '2022-', if((FLOOR('12' / 3.1) * 3) + 1 < 10, '0', ''), (FLOOR('12' / 3.1) * 3) + 1, '-01' ) AND '2022-12-30' GROUP BY area, uid ) a GROUP BY area, MONTH(update_time) ) a GROUP BY area, update_time ), again_deposit AS( SELECT a.area, a.uid, d.user_type, assets_init_time, assets_complete_time, assets_init_date, assets_complete_date, intervene_time , real_amount FROM ( SELECT area, uid, sid, assets_init_time, assets_complete_time, assets_init_date, assets_complete_date, intervene_time, real_amount FROM assets_in_uid_df WHERE performance_type NOT IN (0) AND flag_performance_aum = 1 UNION ALL SELECT area, uid, sid, assets_init_time, assets_complete_time, substr(assets_init_date, 1, 10) AS assets_init_date, substr(assets_complete_date, 1, 10) AS assets_complete_date, intervene_time, real_amount FROM save_uid_df WHERE flag_performance_aum = 1 ) a LEFT JOIN ( SELECT uid, area FROM recall_pc_aum_df WHERE datediff(reflow_create_date, effective_follow_date) BETWEEN 0 AND 14 AND reflow_date BETWEEN concat( '2022-', if((FLOOR('12' / 3.1) * 3) + 1 < 10, '0', ''), (FLOOR('12' / 3.1) * 3) + 1, '-01' ) AND '2022-12-30' GROUP BY uid, area ) b ON a.uid = b.uid AND a.area = b.area LEFT JOIN ( SELECT uid, area FROM expand_pc_aum_df WHERE date_key BETWEEN concat( '2022-', if((FLOOR('12' / 3.1) * 3) + 1 < 10, '0', ''), (FLOOR('12' / 3.1) * 3) + 1, '-01' ) AND '2022-12-30' GROUP BY uid, area ) c ON a.uid = c.uid AND a.area = c.area LEFT JOIN user_type d ON a.uid = d.uid WHERE b.uid IS NULL AND c.uid IS NULL AND real_amount > 0 AND assets_complete_date BETWEEN concat( '2022-', if((FLOOR('12' / 3.1) * 3) + 1 < 10, '0', ''), (FLOOR('12' / 3.1) * 3) + 1, '-01' ) AND '2022-12-30' ), again_deposit_net_aum AS( SELECT a.area, a.uid, user_type, intervene_time, intervene_effect_time, in_amount, assets_complete_time, if( ABS(SUM(b.real_amount)) > ABS(in_amount), 0, in_amount + SUM(nvl(b.real_amount, 0)) ) AS net_aum FROM ( SELECT area, uid, user_type, intervene_time, intervene_effect_time, SUM(real_amount) AS in_amount, MAX(if(rn = 1, assets_complete_time, NULL)) AS assets_complete_time FROM ( SELECT area, uid, user_type, assets_init_time, assets_complete_time, assets_init_date, assets_complete_date, real_amount, intervene_time, intervene_time + 14 * 3600 * 24 AS intervene_effect_time, ROW_NUMBER() OVER( PARTITION BY area, uid, intervene_time ORDER BY assets_complete_time ) AS rn FROM again_deposit a ) t GROUP BY area, uid, user_type, intervene_time, intervene_effect_time ) a LEFT JOIN ( SELECT * FROM inter_net_aum_df WHERE deposit_type LIKE '%out' AND dt = '2022-12-30' ) b ON a.uid = b.uid AND b.create_time BETWEEN a.assets_complete_time AND a.intervene_effect_time GROUP BY a.area, a.uid, user_type, intervene_time, intervene_effect_time, in_amount, assets_complete_time ), total_hk_kpi AS( SELECT MONTH(date_key_t) AS MONTH, 'HK' AS area, SUM(total_pc) AS total_pc , SUM(total_recall) AS total_recall , SUM(total_loss) AS total_loss , SUM(net_total_pc) AS net_total_pc , SUM(m_total_aum) AS m_total_aum FROM ( SELECT concat( SUBSTRING(date_key, 1, 4), '-', SUBSTRING(date_key, 5, 2), '-', SUBSTRING(date_key, 7, 2) ) AS date_key_t, new_asset AS total_pc , asset_user - new_asset - keep_asset AS total_recall , miss_asset AS total_loss , asset_user - last_asset AS net_total_pc , money_in_amt + stock_in_amt + esop_amt - money_out_amt - stock_out_amt AS m_total_aum FROM statis_daily_new WHERE ( date_key >= 20230101 AND date_key <= 20230630 AND area = 'hk' AND is_tob = 0 ) OR ( date_key >= 20230701 AND is_tob = 0 ) ) a WHERE date_key_t BETWEEN concat( '2022-', if((FLOOR('12' / 3.1) * 3) + 1 < 10, '0', ''), (FLOOR('12' / 3.1) * 3) + 1, '-01' ) AND '2022-12-30' GROUP BY MONTH(date_key_t) ) INSERT overwrite TABLE ratio_month PARTITION(dt) SELECT * FROM ( SELECT '2022' AS YEAR, a.month AS MONTH, a.area , total_PC , total_loss , total_recall , net_total_pc , m_total_aum , nvl(m_new_trans_pc, 0) AS m_new_trans_pc_k , total_pc_unloss_k AS total_pc_unloss_k , nvl(m_recall_pc, 0) AS m_recall_pc_K , nvl(m_new_follow_aum, 0) + nvl(m_new_nature_aum, 0) + nvl(m_stock_aum, 0) + nvl(m_recall_follow_aum, 0) + nvl(m_recall_nature_aum, 0) AS m_total_aum_k , nvl(m_new_follow_aum, 0) AS m_new_follow_aum_k , nvl(m_recall_follow_aum, 0) AS m_recall_follow_aum_k , concat('2022-', lpad(a.month, 2, 0), '-01') AS dt FROM ( SELECT * FROM total_hk_kpi ) a LEFT JOIN ( SELECT 'HK' AS area, MONTH(from_unixtime(assets_complete_time, 'yyyy-MM-dd')) AS MONTH, SUM(if(user_type = 'new', net_aum, 0)) AS m_new_nature_aum, SUM(if(user_type = 'stock_user', net_aum, 0)) AS m_stock_aum, SUM(if(user_type = 'lossing_user', net_aum, 0)) AS m_recall_nature_aum FROM again_deposit_net_aum GROUP BY MONTH(from_unixtime(assets_complete_time, 'yyyy-MM-dd')) ) c ON a.area = c.area AND a.month = c.month LEFT JOIN recall_pc_aum b ON a.area = b.area AND a.month = b.month LEFT JOIN trans_pc_aum e ON a.area = e.area AND a.month = e.month ) a
tpcds
example
Submit