解析ClickHouse中的json字符串数据
假如表中有字段actions (string类型) 格式如下:
[
{
"1d_click": "251",
"1d_view": "1293",
"28d_click": "251",
"28d_view": "1293",
"7d_click": "251",
"7d_view": "1293",
"action_type": "post_engagement",
"value": "1544"
},
{
"1d_click": "1",
"28d_click": "1",
"7d_click": "1",
"action_type": "add_to_cart",
"value": "1"
},
{
"1d_click": "207",
"28d_click": "207",
"7d_click": "207",
"action_type": "link_click",
"value": "207"
},
{
"1d_click": "1",
"28d_click": "1",
"7d_click": "1",
"action_type": "omni_purchase",
"value": "1"
}
]
假如现在需要创建新的视图,比如取action_type为add_to_cart中的value 整体视图如下:
CREATE VIEW
yinolink_data.etl_vw_fb_account_daily_spent (`account_id` String, `spent` Float64, `clicks` Float64, `impressions` Float64, `reach` Float64, `7d_click_add_to_cart` Float64, `1d_click_add_to_cart` Float64, `1d_view_add_to_cart` Float64, `7d_click_checkout` Float64, `1d_click_checkout` Float64, `1d_view_checkout` Float64, `7d_click_purchase` Float64, `1d_click_purchase` Float64, `1d_view_purchase` Float64, `7d_click_purchase_value` Float64, `1d_click_purchase_value` Float64, `1d_view_purchase_value` Float64, `7d_click_install` Float64, `1d_click_install` Float64, `1d_view_install` Float64, `account_currency` String, `spent_time` DateTime, `created_time` DateTime) AS
Select
toString(account_id) as account_id,
spend as spent,
clicks as clicks,
impressions as impressions,
reach as reach,
account_currency as account_currency,
up_at as created_time,
JSONExtract(actions, 'Array(Tuple(1d_click Float, 1d_view Float, 7d_click Float, action_type String, value String))') AS action_format,
arrayFilter(i -> tupleElement(i, 'action_type') = 'add_to_cart', action_format) AS add_to_cart,
tupleElement(add_to_cart[1], '7d_click') as `7d_click_add_to_cart`,
tupleElement(add_to_cart[1], '1d_click') as `1d_click_add_to_cart`,
tupleElement(add_to_cart[1], '1d_view') as `1d_view_add_to_cart`,
arrayFilter(i -> tupleElement(i, 'action_type') = 'purchase', action_format) AS purchase,
tupleElement(purchase[1], '7d_click') as `7d_click_purchase`,
tupleElement(purchase[1], '1d_click') as `1d_click_purchase`,
tupleElement(purchase[1], '1d_view') as `1d_view_purchase`,
arrayFilter(i -> tupleElement(i, 'action_type') = 'omni_initiated_checkout', action_format) AS checkout1,
tupleElement(checkout1[1], '7d_click') as `7d_click_checkout1`,
tupleElement(checkout1[1], '1d_click') as `1d_click_checkout1`,
tupleElement(checkout1[1], '1d_view') as `1d_view_checkout1`,
arrayFilter(i -> tupleElement(i, 'action_type') = 'initiated_checkout', action_format) AS checkout2,
tupleElement(checkout2[1], '7d_click') as `7d_click_checkout2`,
tupleElement(checkout2[1], '1d_click') as `1d_click_checkout2`,
tupleElement(checkout2[1], '1d_view') as `1d_view_checkout2`,
COALESCE(
CASE WHEN `7d_click_checkout2` != 0 THEN `7d_click_checkout2` ELSE
CASE WHEN `7d_click_checkout1` != 0 THEN `7d_click_checkout1` ELSE 0.0 END
END, 0.0
) as `7d_click_checkout`,
COALESCE(
CASE WHEN `1d_click_checkout2` != 0 THEN `1d_click_checkout2` ELSE
CASE WHEN `1d_click_checkout1` != 0 THEN `1d_click_checkout1` ELSE 0.0 END
END, 0.0) as `1d_click_checkout`,
COALESCE(
CASE WHEN `1d_view_checkout2` != 0 THEN `1d_view_checkout2` ELSE
CASE WHEN `1d_view_checkout1` != 0 THEN `1d_view_checkout1` ELSE 0.0 END
END, 0.0) as `1d_view_checkout`,
arrayFilter(i -> tupleElement(i, 'action_type') = 'omni_app_install', action_format) AS app_install1,
tupleElement(app_install1[1], '7d_click') as `7d_click_install1`,
tupleElement(app_install1[1], '1d_click') as `1d_click_install1`,
tupleElement(app_install1[1], '1d_view') as `1d_view_install1`,
arrayFilter(i -> tupleElement(i, 'action_type') = 'app_install', action_format) AS app_install2,
tupleElement(app_install2[1], '7d_click') as `7d_click_install2`,
tupleElement(app_install2[1], '1d_click') as `1d_click_install2`,
tupleElement(app_install2[1], '1d_view') as `1d_view_install2`,
COALESCE(
CASE WHEN `7d_click_install2` != 0 THEN `7d_click_install2` ELSE
CASE WHEN `7d_click_install1` != 0 THEN `7d_click_install1` ELSE 0.0 END
END, 0.0) as `7d_click_install`,
COALESCE(
CASE WHEN `1d_click_install2` != 0 THEN `1d_click_install2` ELSE
CASE WHEN `1d_click_install1` != 0 THEN `1d_click_install1` ELSE 0.0 END
END, 0.0) as `1d_click_install`,
COALESCE(
CASE WHEN `1d_view_install2` != 0 THEN `1d_view_install2` ELSE
CASE WHEN `1d_view_install1` != 0 THEN `1d_view_install1` ELSE 0.0 END
END, 0.0) as `1d_view_install`,
JSONExtract(action_values, 'Array(Tuple(1d_click Float, 1d_view Float, 7d_click Float, action_type String, value String))') AS action_values_format,
arrayFilter(i -> tupleElement(i, 'action_type') = 'purchase', action_values_format) AS value_purchase,
tupleElement(value_purchase[1], '7d_click') as `7d_click_purchase_value`,
tupleElement(value_purchase[1], '1d_click') as `1d_click_purchase_value`,
tupleElement(value_purchase[1], '1d_view') as `1d_view_purchase_value`,
toString(date_id) as date_id_str,
toDateTime(concat(substring(date_id_str,1,4), '-', substring(date_id_str,5,2),'-',substring(date_id_str,7,2), ' 00:00:00') ) as spent_time
FROM
(
SELECT
*,
row_number() OVER (
PARTITION BY
account_id,
date_id
) AS rn
FROM
yinolink_data.etl_fb_accounts_spend
) AS tmp
WHERE
rn = 1
可参考解析json:
SELECT
JSONExtract(actions, 'Array(Tuple(1d_click Float, 1d_view Float, 7d_click Float, action_type String, value String))') AS x,
arrayFilter(i -> tupleElement(i, 'action_type') = 'purchase', x) AS z,
tupleElement(z[1], '1d_click') as value_1d_click
FROM etl_fb_accounts_spend
解析json中的数字字符串:
CREATE VIEW
yinolink_data.etl_vw_fb_account_daily_spent (`account_id` String, `spent` Float64, `clicks` Float64, `impressions` Float64, `reach` Float64, `7d_click_add_to_cart` Float64, `1d_click_add_to_cart` Float64, `1d_view_add_to_cart` Float64, `7d_click_checkout` Float64, `1d_click_checkout` Float64, `1d_view_checkout` Float64, `7d_click_purchase` Float64, `1d_click_purchase` Float64, `1d_view_purchase` Float64, `7d_click_purchase_value` Float64, `1d_click_purchase_value` Float64, `1d_view_purchase_value` Float64, `7d_click_install` Float64, `1d_click_install` Float64, `1d_view_install` Float64, `account_currency` String, `spent_time` DateTime, `created_time` DateTime) AS
Select
toString(account_id) AS account_id,
spend AS spent,
clicks AS clicks,
impressions AS impressions,
reach AS reach,
account_currency AS account_currency,
up_at AS created_time,
JSONExtract(actions, 'Array(Tuple(1d_click String, 1d_view String, 7d_click String, action_type String, value String))') AS action_format,
arrayFilter(i -> tupleElement(i, 'action_type') = 'add_to_cart', action_format) AS add_to_cart,
COALESCE(toFloat64OrNull(tupleElement(add_to_cart[1], '7d_click')), 0) AS `7d_click_add_to_cart`,
COALESCE(toFloat64OrNull(tupleElement(add_to_cart[1], '1d_click')), 0) AS `1d_click_add_to_cart`,
COALESCE(toFloat64OrNull(tupleElement(add_to_cart[1], '1d_view')), 0) AS `1d_view_add_to_cart`,
arrayFilter(i -> tupleElement(i, 'action_type') = 'purchase', action_format) AS purchase,
COALESCE(toFloat64OrNull(tupleElement(purchase[1], '7d_click')), 0) AS `7d_click_purchase`,
COALESCE(toFloat64OrNull(tupleElement(purchase[1], '1d_click')), 0) AS `1d_click_purchase`,
COALESCE(toFloat64OrNull(tupleElement(purchase[1], '1d_view')), 0) AS `1d_view_purchase`,
arrayFilter(i -> tupleElement(i, 'action_type') = 'omni_initiated_checkout', action_format) AS checkout1,
COALESCE(toFloat64OrNull(tupleElement(checkout1[1], '7d_click')), 0) as `7d_click_checkout1`,
COALESCE(toFloat64OrNull(tupleElement(checkout1[1], '1d_click')), 0) as `1d_click_checkout1`,
COALESCE(toFloat64OrNull(tupleElement(checkout1[1], '1d_view')), 0) as `1d_view_checkout1`,
arrayFilter(i -> tupleElement(i, 'action_type') = 'initiated_checkout', action_format) AS checkout2,
COALESCE(toFloat64OrNull(tupleElement(checkout2[1], '7d_click')), 0) as `7d_click_checkout2`,
COALESCE(toFloat64OrNull(tupleElement(checkout2[1], '1d_click')), 0) as `1d_click_checkout2`,
COALESCE(toFloat64OrNull(tupleElement(checkout2[1], '1d_view')), 0) as `1d_view_checkout2`,
COALESCE(
CASE WHEN `7d_click_checkout2` != 0 THEN `7d_click_checkout2` ELSE
CASE WHEN `7d_click_checkout1` != 0 THEN `7d_click_checkout1` ELSE 0.0 END
END, 0.0
) as `7d_click_checkout`,
COALESCE(
CASE WHEN `1d_click_checkout2` != 0 THEN `1d_click_checkout2` ELSE
CASE WHEN `1d_click_checkout1` != 0 THEN `1d_click_checkout1` ELSE 0.0 END
END, 0.0) as `1d_click_checkout`,
COALESCE(
CASE WHEN `1d_view_checkout2` != 0 THEN `1d_view_checkout2` ELSE
CASE WHEN `1d_view_checkout1` != 0 THEN `1d_view_checkout1` ELSE 0.0 END
END, 0.0) as `1d_view_checkout`,
arrayFilter(i -> tupleElement(i, 'action_type') = 'omni_app_install', action_format) AS app_install1,
COALESCE(toFloat64OrNull(tupleElement(app_install1[1], '7d_click')), 0) as `7d_click_install1`,
COALESCE(toFloat64OrNull(tupleElement(app_install1[1], '1d_click')), 0) as `1d_click_install1`,
COALESCE(toFloat64OrNull(tupleElement(app_install1[1], '1d_view')), 0) as `1d_view_install1`,
arrayFilter(i -> tupleElement(i, 'action_type') = 'app_install', action_format) AS app_install2,
COALESCE(toFloat64OrNull(tupleElement(app_install2[1], '7d_click')), 0) as `7d_click_install2`,
COALESCE(toFloat64OrNull(tupleElement(app_install2[1], '1d_click')), 0) as `1d_click_install2`,
COALESCE(toFloat64OrNull(tupleElement(app_install2[1], '1d_view')), 0) as `1d_view_install2`,
COALESCE(
CASE WHEN `7d_click_install2` != 0 THEN `7d_click_install2` ELSE
CASE WHEN `7d_click_install1` != 0 THEN `7d_click_install1` ELSE 0.0 END
END, 0.0) as `7d_click_install`,
COALESCE(
CASE WHEN `1d_click_install2` != 0 THEN `1d_click_install2` ELSE
CASE WHEN `1d_click_install1` != 0 THEN `1d_click_install1` ELSE 0.0 END
END, 0.0) as `1d_click_install`,
COALESCE(
CASE WHEN `1d_view_install2` != 0 THEN `1d_view_install2` ELSE
CASE WHEN `1d_view_install1` != 0 THEN `1d_view_install1` ELSE 0.0 END
END, 0.0) as `1d_view_install`,
JSONExtract(action_values, 'Array(Tuple(1d_click String, 1d_view String, 7d_click String, action_type String, value String))') AS action_values_format,
arrayFilter(i -> tupleElement(i, 'action_type') = 'purchase', action_values_format) AS value_purchase,
COALESCE(toFloat64OrNull(tupleElement(value_purchase[1], '7d_click')), 0) as `7d_click_purchase_value`,
COALESCE(toFloat64OrNull(tupleElement(value_purchase[1], '1d_click')), 0) as `1d_click_purchase_value`,
COALESCE(toFloat64OrNull(tupleElement(value_purchase[1], '1d_view')), 0) as `1d_view_purchase_value`,
toString(date_id) AS date_id_str,
toDateTime(concat(substring(date_id_str, 1, 4), '-', substring(date_id_str, 5, 2), '-', substring(date_id_str, 7, 2), ' 00:00:00')) AS spent_time
FROM
(
SELECT
*,
row_number() OVER (
PARTITION BY
account_id,
date_id
) AS rn
FROM
yinolink_data.etl_fb_accounts_spend
) AS tmp
WHERE
rn = 1
参考:https://github.com/ClickHouse/ClickHouse/issues/46851