解析ClickHouse中的复杂json字符串数据

757人浏览 2023-07-26

解析ClickHouse中的json字符串数据

假如表中有字段actions (string类型) 格式如下:

[
    {
        "1d_click": "251",
        "1d_view": "1293",
        "28d_click": "251",
        "28d_view": "1293",
        "7d_click": "251",
        "7d_view": "1293",
        "action_type": "post_engagement",
        "value": "1544"
    },
    {
        "1d_click": "1",
        "28d_click": "1",
        "7d_click": "1",
        "action_type": "add_to_cart",
        "value": "1"
    },
    {
        "1d_click": "207",
        "28d_click": "207",
        "7d_click": "207",
        "action_type": "link_click",
        "value": "207"
    },
    {
        "1d_click": "1",
        "28d_click": "1",
        "7d_click": "1",
        "action_type": "omni_purchase",
        "value": "1"
    }
]

假如现在需要创建新的视图,比如取action_type为add_to_cart中的value 整体视图如下:

CREATE VIEW
    yinolink_data.etl_vw_fb_account_daily_spent (`account_id` String, `spent` Float64, `clicks` Float64, `impressions` Float64, `reach` Float64, `7d_click_add_to_cart` Float64, `1d_click_add_to_cart` Float64, `1d_view_add_to_cart` Float64, `7d_click_checkout` Float64, `1d_click_checkout` Float64, `1d_view_checkout` Float64, `7d_click_purchase` Float64, `1d_click_purchase` Float64, `1d_view_purchase` Float64, `7d_click_purchase_value` Float64, `1d_click_purchase_value` Float64, `1d_view_purchase_value` Float64, `7d_click_install` Float64, `1d_click_install` Float64, `1d_view_install` Float64, `account_currency` String, `spent_time` DateTime, `created_time` DateTime) AS
Select
    toString(account_id) as account_id,
    spend as spent,
    clicks as clicks,
    impressions as impressions,
    reach as reach,
    account_currency as account_currency,
    up_at as created_time,
    JSONExtract(actions, 'Array(Tuple(1d_click Float, 1d_view Float, 7d_click Float, action_type String, value String))') AS action_format,

    arrayFilter(i -> tupleElement(i, 'action_type') = 'add_to_cart', action_format) AS add_to_cart,
    tupleElement(add_to_cart[1], '7d_click') as `7d_click_add_to_cart`,
    tupleElement(add_to_cart[1], '1d_click') as `1d_click_add_to_cart`,
    tupleElement(add_to_cart[1], '1d_view') as `1d_view_add_to_cart`,

    arrayFilter(i -> tupleElement(i, 'action_type') = 'purchase', action_format) AS purchase,
    tupleElement(purchase[1], '7d_click') as `7d_click_purchase`,
    tupleElement(purchase[1], '1d_click') as `1d_click_purchase`,
    tupleElement(purchase[1], '1d_view') as `1d_view_purchase`,

    arrayFilter(i -> tupleElement(i, 'action_type') = 'omni_initiated_checkout', action_format) AS checkout1,
    tupleElement(checkout1[1], '7d_click') as `7d_click_checkout1`,
    tupleElement(checkout1[1], '1d_click') as `1d_click_checkout1`,
    tupleElement(checkout1[1], '1d_view') as `1d_view_checkout1`,
    arrayFilter(i -> tupleElement(i, 'action_type') = 'initiated_checkout', action_format) AS checkout2,
    tupleElement(checkout2[1], '7d_click') as `7d_click_checkout2`,
    tupleElement(checkout2[1], '1d_click') as `1d_click_checkout2`,
    tupleElement(checkout2[1], '1d_view') as `1d_view_checkout2`,
    COALESCE(
        CASE WHEN `7d_click_checkout2` != 0 THEN `7d_click_checkout2` ELSE
        CASE WHEN `7d_click_checkout1` != 0 THEN `7d_click_checkout1` ELSE 0.0 END
        END, 0.0
    ) as `7d_click_checkout`,
    COALESCE(
        CASE WHEN `1d_click_checkout2` != 0 THEN `1d_click_checkout2` ELSE
            CASE WHEN `1d_click_checkout1` != 0 THEN `1d_click_checkout1` ELSE 0.0 END
        END, 0.0) as `1d_click_checkout`,
    COALESCE(
        CASE WHEN `1d_view_checkout2` != 0 THEN `1d_view_checkout2` ELSE
            CASE WHEN `1d_view_checkout1` != 0 THEN `1d_view_checkout1` ELSE 0.0 END
        END, 0.0) as `1d_view_checkout`,

    arrayFilter(i -> tupleElement(i, 'action_type') = 'omni_app_install', action_format) AS app_install1,
    tupleElement(app_install1[1], '7d_click') as `7d_click_install1`,
    tupleElement(app_install1[1], '1d_click') as `1d_click_install1`,
    tupleElement(app_install1[1], '1d_view') as `1d_view_install1`,
    arrayFilter(i -> tupleElement(i, 'action_type') = 'app_install', action_format) AS app_install2,
    tupleElement(app_install2[1], '7d_click') as `7d_click_install2`,
    tupleElement(app_install2[1], '1d_click') as `1d_click_install2`,
    tupleElement(app_install2[1], '1d_view') as `1d_view_install2`,
    COALESCE(
        CASE WHEN `7d_click_install2` != 0 THEN `7d_click_install2` ELSE
            CASE WHEN `7d_click_install1` != 0 THEN `7d_click_install1` ELSE 0.0 END
        END, 0.0) as `7d_click_install`,
    COALESCE(
        CASE WHEN `1d_click_install2` != 0 THEN `1d_click_install2` ELSE
            CASE WHEN `1d_click_install1` != 0 THEN `1d_click_install1` ELSE 0.0 END
        END, 0.0) as `1d_click_install`,
    COALESCE(
        CASE WHEN `1d_view_install2` != 0 THEN `1d_view_install2` ELSE
            CASE WHEN `1d_view_install1` != 0 THEN `1d_view_install1` ELSE 0.0 END
        END, 0.0) as `1d_view_install`,

    JSONExtract(action_values, 'Array(Tuple(1d_click Float, 1d_view Float, 7d_click Float, action_type String, value String))') AS action_values_format,
    arrayFilter(i -> tupleElement(i, 'action_type') = 'purchase', action_values_format) AS value_purchase,
    tupleElement(value_purchase[1], '7d_click') as `7d_click_purchase_value`,
    tupleElement(value_purchase[1], '1d_click') as `1d_click_purchase_value`,
    tupleElement(value_purchase[1], '1d_view') as `1d_view_purchase_value`,
    toString(date_id) as date_id_str,
    toDateTime(concat(substring(date_id_str,1,4), '-', substring(date_id_str,5,2),'-',substring(date_id_str,7,2), ' 00:00:00') ) as spent_time
FROM
  (
    SELECT
      *,
      row_number() OVER (
        PARTITION BY
          account_id,
          date_id
      ) AS rn
    FROM
      yinolink_data.etl_fb_accounts_spend
  ) AS tmp
WHERE
  rn = 1

可参考解析json:

SELECT
    JSONExtract(actions, 'Array(Tuple(1d_click Float, 1d_view Float, 7d_click Float, action_type String, value String))') AS x,
    arrayFilter(i -> tupleElement(i, 'action_type') = 'purchase', x) AS z,
    tupleElement(z[1], '1d_click') as value_1d_click
  FROM etl_fb_accounts_spend

 

解析json中的数字字符串:


CREATE VIEW
    yinolink_data.etl_vw_fb_account_daily_spent (`account_id` String, `spent` Float64, `clicks` Float64, `impressions` Float64, `reach` Float64, `7d_click_add_to_cart` Float64, `1d_click_add_to_cart` Float64, `1d_view_add_to_cart` Float64, `7d_click_checkout` Float64, `1d_click_checkout` Float64, `1d_view_checkout` Float64, `7d_click_purchase` Float64, `1d_click_purchase` Float64, `1d_view_purchase` Float64, `7d_click_purchase_value` Float64, `1d_click_purchase_value` Float64, `1d_view_purchase_value` Float64, `7d_click_install` Float64, `1d_click_install` Float64, `1d_view_install` Float64, `account_currency` String, `spent_time` DateTime, `created_time` DateTime) AS
Select
    toString(account_id) AS account_id,
        spend AS spent,
        clicks AS clicks,
        impressions AS impressions,
        reach AS reach,
        account_currency AS account_currency,
        up_at AS created_time,
        JSONExtract(actions, 'Array(Tuple(1d_click String, 1d_view String, 7d_click String, action_type String, value String))') AS action_format,
    
        arrayFilter(i -> tupleElement(i, 'action_type') = 'add_to_cart', action_format) AS add_to_cart,
        COALESCE(toFloat64OrNull(tupleElement(add_to_cart[1], '7d_click')), 0) AS `7d_click_add_to_cart`,
        COALESCE(toFloat64OrNull(tupleElement(add_to_cart[1], '1d_click')), 0) AS `1d_click_add_to_cart`,
        COALESCE(toFloat64OrNull(tupleElement(add_to_cart[1], '1d_view')), 0) AS `1d_view_add_to_cart`,
    
        arrayFilter(i -> tupleElement(i, 'action_type') = 'purchase', action_format) AS purchase,
        COALESCE(toFloat64OrNull(tupleElement(purchase[1], '7d_click')), 0) AS `7d_click_purchase`,
        COALESCE(toFloat64OrNull(tupleElement(purchase[1], '1d_click')), 0) AS `1d_click_purchase`,
        COALESCE(toFloat64OrNull(tupleElement(purchase[1], '1d_view')), 0) AS `1d_view_purchase`,
    
    
        arrayFilter(i -> tupleElement(i, 'action_type') = 'omni_initiated_checkout', action_format) AS checkout1,
        COALESCE(toFloat64OrNull(tupleElement(checkout1[1], '7d_click')), 0) as `7d_click_checkout1`,
        COALESCE(toFloat64OrNull(tupleElement(checkout1[1], '1d_click')), 0) as `1d_click_checkout1`,
        COALESCE(toFloat64OrNull(tupleElement(checkout1[1], '1d_view')), 0) as `1d_view_checkout1`,
        arrayFilter(i -> tupleElement(i, 'action_type') = 'initiated_checkout', action_format) AS checkout2,
        COALESCE(toFloat64OrNull(tupleElement(checkout2[1], '7d_click')), 0) as `7d_click_checkout2`,
        COALESCE(toFloat64OrNull(tupleElement(checkout2[1], '1d_click')), 0) as `1d_click_checkout2`,
        COALESCE(toFloat64OrNull(tupleElement(checkout2[1], '1d_view')), 0) as `1d_view_checkout2`,
    
        COALESCE(
            CASE WHEN `7d_click_checkout2` != 0 THEN `7d_click_checkout2` ELSE
            CASE WHEN `7d_click_checkout1` != 0 THEN `7d_click_checkout1` ELSE 0.0 END
            END, 0.0
        ) as `7d_click_checkout`,
        COALESCE(
            CASE WHEN `1d_click_checkout2` != 0 THEN `1d_click_checkout2` ELSE
                CASE WHEN `1d_click_checkout1` != 0 THEN `1d_click_checkout1` ELSE 0.0 END
            END, 0.0) as `1d_click_checkout`,
        COALESCE(
            CASE WHEN `1d_view_checkout2` != 0 THEN `1d_view_checkout2` ELSE
                CASE WHEN `1d_view_checkout1` != 0 THEN `1d_view_checkout1` ELSE 0.0 END
            END, 0.0) as `1d_view_checkout`,
    
        arrayFilter(i -> tupleElement(i, 'action_type') = 'omni_app_install', action_format) AS app_install1,
        COALESCE(toFloat64OrNull(tupleElement(app_install1[1], '7d_click')), 0) as `7d_click_install1`,
        COALESCE(toFloat64OrNull(tupleElement(app_install1[1], '1d_click')), 0) as `1d_click_install1`,
        COALESCE(toFloat64OrNull(tupleElement(app_install1[1], '1d_view')), 0) as `1d_view_install1`,
        arrayFilter(i -> tupleElement(i, 'action_type') = 'app_install', action_format) AS app_install2,
        COALESCE(toFloat64OrNull(tupleElement(app_install2[1], '7d_click')), 0) as `7d_click_install2`,
        COALESCE(toFloat64OrNull(tupleElement(app_install2[1], '1d_click')), 0) as `1d_click_install2`,
        COALESCE(toFloat64OrNull(tupleElement(app_install2[1], '1d_view')), 0) as `1d_view_install2`,
        COALESCE(
            CASE WHEN `7d_click_install2` != 0 THEN `7d_click_install2` ELSE
                CASE WHEN `7d_click_install1` != 0 THEN `7d_click_install1` ELSE 0.0 END
            END, 0.0) as `7d_click_install`,
        COALESCE(
            CASE WHEN `1d_click_install2` != 0 THEN `1d_click_install2` ELSE
                CASE WHEN `1d_click_install1` != 0 THEN `1d_click_install1` ELSE 0.0 END
            END, 0.0) as `1d_click_install`,
        COALESCE(
            CASE WHEN `1d_view_install2` != 0 THEN `1d_view_install2` ELSE
                CASE WHEN `1d_view_install1` != 0 THEN `1d_view_install1` ELSE 0.0 END
            END, 0.0) as `1d_view_install`,
    
        JSONExtract(action_values, 'Array(Tuple(1d_click String, 1d_view String, 7d_click String, action_type String, value String))') AS action_values_format,
        arrayFilter(i -> tupleElement(i, 'action_type') = 'purchase', action_values_format) AS value_purchase,
        COALESCE(toFloat64OrNull(tupleElement(value_purchase[1], '7d_click')), 0) as `7d_click_purchase_value`,
        COALESCE(toFloat64OrNull(tupleElement(value_purchase[1], '1d_click')), 0) as `1d_click_purchase_value`,
        COALESCE(toFloat64OrNull(tupleElement(value_purchase[1], '1d_view')), 0) as `1d_view_purchase_value`,
    
        toString(date_id) AS date_id_str,
        toDateTime(concat(substring(date_id_str, 1, 4), '-', substring(date_id_str, 5, 2), '-', substring(date_id_str, 7, 2), ' 00:00:00')) AS spent_time
FROM
  (
    SELECT
      *,
      row_number() OVER (
        PARTITION BY
          account_id,
          date_id
      ) AS rn
    FROM
      yinolink_data.etl_fb_accounts_spend
  ) AS tmp
WHERE
  rn = 1

 

 

参考:https://github.com/ClickHouse/ClickHouse/issues/46851

 

 

 

 

推荐文章

设置谷歌云服务器使用ssh密码方式远程连接服务器
2020-09-09
Linux查看IO占用过高的进程。
2021-11-22
Golang项目部署守护运行 使用bash脚本校验进程是否后台运行 并停止/启动/重启/编译运行go项目(后台守护运行)
2021-08-06
搜索文章