{"id":47193,"date":"2022-11-21T18:20:16","date_gmt":"2024-03-08T17:19:03","guid":{"rendered":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8delta-live-tables%e7%9a%84%e5%8f%98%e6%9b%b4%e6%95%b0%e6%8d%ae%e6%8d%95%e8%8e%b7%ef%bc%88cdc%ef%bc%89%e3%80%82\/"},"modified":"2024-04-29T01:49:20","modified_gmt":"2024-04-28T17:49:20","slug":"%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8delta-live-tables%e7%9a%84%e5%8f%98%e6%9b%b4%e6%95%b0%e6%8d%ae%e6%8d%95%e8%8e%b7%ef%bc%88cdc%ef%bc%89%e3%80%82","status":"publish","type":"post","link":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8delta-live-tables%e7%9a%84%e5%8f%98%e6%9b%b4%e6%95%b0%e6%8d%ae%e6%8d%95%e8%8e%b7%ef%bc%88cdc%ef%bc%89%e3%80%82\/","title":{"rendered":"\u5c1d\u8bd5\u4f7f\u7528Delta Live Tables\u7684\u53d8\u66f4\u6570\u636e\u6355\u83b7\uff08CDC\uff09"},"content":{"rendered":"<p>\u6211\u4f1a\u6d4f\u89c8\u8fd9\u91cc\u53d1\u5e03\u7684\u7b14\u8bb0\u672c\u3002\u751f\u6210\u53d1\u751f\u53d8\u5316\u7684\u6570\u636e\uff0c\u5e76\u4f7f\u7528Delta Live Tables\uff08DLT\uff09\u7684\u66f4\u6539\u6570\u636e\u6355\u83b7\uff08CDC\uff09\u5c06\u4ec5\u53d8\u66f4\u53cd\u6620\u5230\u540e\u7eed\u8868\u683c\u4e2d\u3002<\/p>\n<p>&nbsp;<\/p>\n<p>\u4ee5\u4e0b\u662f\u7ffb\u8bd1\u7248\u672c\u3002\u6211\u4eec\u53ea\u7ffb\u8bd1\u4e86Python\u7248\u672c\u7684\u7ba1\u9053\u7b14\u8bb0\u672c\u3002<\/p>\n<p>&nbsp;<\/p>\n<h1>\u6570\u636e\u51c6\u5907<\/h1>\n<p>\u4f7f\u7528Faker\u751f\u6210CDC\u7684\u57fa\u7840\u6570\u636e\u3002<\/p>\n<p>&nbsp;<\/p>\n<h2>Faker\u7684\u5b89\u88c5<\/h2>\n<pre class=\"post-pre\"><code>%pip install Faker\r\n<\/code><\/pre>\n<h2>\u751f\u6210\u865a\u62df\u6570\u636e<\/h2>\n<pre class=\"post-pre\"><code><span class=\"kn\">from<\/span> <span class=\"n\">pyspark.sql<\/span> <span class=\"kn\">import<\/span> <span class=\"n\">functions<\/span> <span class=\"k\">as<\/span> <span class=\"n\">F<\/span>\r\n<span class=\"kn\">from<\/span> <span class=\"n\">faker<\/span> <span class=\"kn\">import<\/span> <span class=\"n\">Faker<\/span>\r\n<span class=\"kn\">from<\/span> <span class=\"n\">collections<\/span> <span class=\"kn\">import<\/span> <span class=\"n\">OrderedDict<\/span> \r\n<span class=\"kn\">import<\/span> <span class=\"n\">uuid<\/span>\r\n\r\n<span class=\"c1\"># \u30c7\u30fc\u30bf\u306e\u683c\u7d0d\u30d1\u30b9: \u9069\u5b9c\u5909\u66f4\u3057\u3066\u304f\u3060\u3055\u3044\r\n<\/span><span class=\"n\">folder<\/span> <span class=\"o\">=<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">\/tmp\/takaaki.yayoi@databricks.com\/demo\/cdc_raw<\/span><span class=\"sh\">\"<\/span>\r\n<span class=\"c1\">#dbutils.fs.rm(folder, True)\r\n<\/span><span class=\"k\">try<\/span><span class=\"p\">:<\/span>\r\n  <span class=\"n\">dbutils<\/span><span class=\"p\">.<\/span><span class=\"n\">fs<\/span><span class=\"p\">.<\/span><span class=\"nf\">ls<\/span><span class=\"p\">(<\/span><span class=\"n\">folder<\/span><span class=\"p\">)<\/span>\r\n<span class=\"k\">except<\/span><span class=\"p\">:<\/span>\r\n  <span class=\"nf\">print<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">\u30d5\u30a9\u30eb\u30c0\u30fc\u304c\u5b58\u5728\u3057\u307e\u305b\u3093\u3001\u30c7\u30fc\u30bf\u3092\u751f\u6210\u4e2d<\/span><span class=\"gp\">...<\/span><span class=\"sh\">\"<\/span><span class=\"s\">)  \r\n  fake = Faker()\r\n  fake_firstname = F.udf(fake.first_name)\r\n  fake_lastname = F.udf(fake.last_name)\r\n  fake_email = F.udf(fake.ascii_company_email)\r\n  fake_date = F.udf(lambda:fake.date_time_this_month().strftime(<\/span><span class=\"sh\">\"<\/span><span class=\"o\">%<\/span><span class=\"n\">m<\/span><span class=\"o\">-%<\/span><span class=\"n\">d<\/span><span class=\"o\">-%<\/span><span class=\"n\">Y<\/span> <span class=\"o\">%<\/span><span class=\"n\">H<\/span><span class=\"p\">:<\/span><span class=\"o\">%<\/span><span class=\"n\">M<\/span><span class=\"p\">:<\/span><span class=\"o\">%<\/span><span class=\"n\">S<\/span><span class=\"sh\">\"<\/span><span class=\"s\">))\r\n  fake_address = F.udf(fake.address)\r\n  operations = OrderedDict([(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">APPEND<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, 0.5),(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">DELETE<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, 0.1),(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">UPDATE<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, 0.3),(None, 0.01)])\r\n  fake_operation = F.udf(lambda:fake.random_elements(elements=operations, length=1)[0])\r\n  fake_id = F.udf(lambda: str(uuid.uuid4()))\r\n\r\n  df = spark.range(0, 100000)\r\n  df = df.withColumn(<\/span><span class=\"sh\">\"<\/span><span class=\"nb\">id<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, fake_id())\r\n  df = df.withColumn(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">firstname<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, fake_firstname())\r\n  df = df.withColumn(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">lastname<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, fake_lastname())\r\n  df = df.withColumn(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">email<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, fake_email())\r\n  df = df.withColumn(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">address<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, fake_address())\r\n  df = df.withColumn(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">operation<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, fake_operation())\r\n  df = df.withColumn(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">operation_date<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, fake_date())\r\n\r\n  df.repartition(100).write.format(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">json<\/span><span class=\"sh\">\"<\/span><span class=\"s\">).mode(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">overwrite<\/span><span class=\"sh\">\"<\/span><span class=\"s\">).save(folder+<\/span><span class=\"sh\">\"<\/span><span class=\"o\">\/<\/span><span class=\"n\">customers<\/span><span class=\"sh\">\"<\/span><span class=\"s\">)\r\n  \r\n  df = spark.range(0, 10000)\r\n  df = df.withColumn(<\/span><span class=\"sh\">\"<\/span><span class=\"nb\">id<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, fake_id())\r\n  df = df.withColumn(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">transaction_date<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, fake_date())\r\n  df = df.withColumn(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">amount<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, F.round(F.rand()*1000))\r\n  df = df.withColumn(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">item_count<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, F.round(F.rand()*10))\r\n  df = df.withColumn(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">operation<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, fake_operation())\r\n  df = df.withColumn(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">operation_date<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, fake_date())\r\n\r\n  # \u540c\u3058ID\u304c\u751f\u6210\u3055\u308c\u305f\u9867\u5ba2\u3068Join\r\n  df = df.withColumn(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">t_id<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, F.monotonically_increasing_id()).join(spark.read.json(folder+<\/span><span class=\"sh\">\"<\/span><span class=\"o\">\/<\/span><span class=\"n\">customers<\/span><span class=\"sh\">\"<\/span><span class=\"s\">).select(<\/span><span class=\"sh\">\"<\/span><span class=\"nb\">id<\/span><span class=\"sh\">\"<\/span><span class=\"s\">).withColumnRenamed(<\/span><span class=\"sh\">\"<\/span><span class=\"nb\">id<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, <\/span><span class=\"sh\">\"<\/span><span class=\"n\">customer_id<\/span><span class=\"sh\">\"<\/span><span class=\"s\">).withColumn(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">t_id<\/span><span class=\"sh\">\"<\/span><span class=\"s\">, F.monotonically_increasing_id()), <\/span><span class=\"sh\">\"<\/span><span class=\"n\">t_id<\/span><span class=\"sh\">\"<\/span><span class=\"s\">).drop(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">t_id<\/span><span class=\"sh\">\"<\/span><span class=\"s\">)\r\n  df.repartition(10).write.format(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">json<\/span><span class=\"sh\">\"<\/span><span class=\"s\">).mode(<\/span><span class=\"sh\">\"<\/span><span class=\"n\">overwrite<\/span><span class=\"sh\">\"<\/span><span class=\"s\">).save(folder+<\/span><span class=\"sh\">\"<\/span><span class=\"o\">\/<\/span><span class=\"n\">transactions<\/span><span class=\"sh\">\"<\/span><span class=\"s\">)\r\n<\/span><\/code><\/pre>\n<h2>\u786e\u8ba4\u6570\u636e<\/h2>\n<pre class=\"post-pre\"><code><span class=\"n\">spark<\/span><span class=\"p\">.<\/span><span class=\"n\">read<\/span><span class=\"p\">.<\/span><span class=\"nf\">json<\/span><span class=\"p\">(<\/span><span class=\"n\">folder<\/span><span class=\"o\">+<\/span><span class=\"sh\">\"<\/span><span class=\"s\">\/customers<\/span><span class=\"sh\">\"<\/span><span class=\"p\">).<\/span><span class=\"nf\">display<\/span><span class=\"p\">()<\/span>\r\n<\/code><\/pre>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7d1f913a08637a69ed74\/14-0.png\" alt=\"Screen Shot 2022-12-15 at 18.34.17.png\" \/><\/div>\n<h2>\u7ba1\u9053\u7684\u521d\u59cb\u914d\u7f6e\u8bbe\u7f6e<\/h2>\n<p>\u5728\u8fd9\u91cc\uff0c\u6211\u4eec\u5c06\u521d\u59cb\u5316\u7528\u4e8eDelta Live Tables\u7ba1\u9053\u7684\u6570\u636e\u5e93\u548c\u6587\u4ef6\u8def\u5f84\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"c1\">-- \u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u306e\u4f5c\u6210: \u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u306e\u30bf\u30fc\u30b2\u30c3\u30c8\u306b\u6307\u5b9a\u3057\u3066\u304f\u3060\u3055\u3044<\/span>\r\n<span class=\"k\">DROP<\/span> <span class=\"k\">DATABASE<\/span> <span class=\"n\">IF<\/span> <span class=\"k\">EXISTS<\/span> <span class=\"n\">cdc_data_taka<\/span> <span class=\"k\">CASCADE<\/span><span class=\"p\">;<\/span>\r\n<span class=\"k\">CREATE<\/span> <span class=\"k\">DATABASE<\/span> <span class=\"n\">cdc_data_taka<\/span><span class=\"p\">;<\/span>\r\n<\/code><\/pre>\n<pre class=\"post-pre\"><code><span class=\"c1\"># DLT\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u306e\u30b9\u30c8\u30ec\u30fc\u30b8\u306e\u521d\u671f\u5316: \u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u306e\u30b9\u30c8\u30ec\u30fc\u30b8\u306b\u6307\u5b9a\u3057\u3066\u304f\u3060\u3055\u3044\r\n<\/span><span class=\"n\">dbutils<\/span><span class=\"p\">.<\/span><span class=\"n\">fs<\/span><span class=\"p\">.<\/span><span class=\"nf\">rm<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">\/tmp\/takaaki.yayoi@databricks.com\/demo\/dlt_cdc<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"bp\">True<\/span><span class=\"p\">)<\/span>\r\n<\/code><\/pre>\n<h1>CDC\u7ba1\u7ebf\u7684\u5b9e\u65bd\u548c\u6267\u884c\u3002<\/h1>\n<p>&nbsp;<\/p>\n<p>\u6574\u4e2a\u6d41\u7a0b\u5982\u4e0b\u6240\u793a\u3002<\/p>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7d1f913a08637a69ed74\/22-0.png\" alt=\"\" \/><\/div>\n<h2>CDC\u7684\u91cd\u8981\u6027<\/h2>\n<p>Change Data Capture\uff08CDC\uff09\u662f\u6355\u83b7\u6570\u636e\u5e93\u6216\u6570\u636e\u4ed3\u5e93\u4e2d\u8bb0\u5f55\u66f4\u6539\u7684\u8fc7\u7a0b\u3002\u8fd9\u4e9b\u66f4\u6539\u901a\u5e38\u88ab\u89c6\u4e3a\u5220\u9664\u3001\u6dfb\u52a0\u548c\u66f4\u65b0\u7b49\u64cd\u4f5c\u3002<\/p>\n<p>\u4ece\u6570\u636e\u5e93\u4e2d\u5bfc\u51fa\u6570\u636e\u5e93\u8f6c\u50a8\uff0c\u7136\u540e\u5c06\u5176\u5bfc\u5165\u5230Lakehouse\/\u6570\u636e\u4ed3\u5e93\/\u6570\u636e\u6e56\u4e2d\u662f\u4e00\u79cd\u7b80\u5355\u7684\u6570\u636e\u590d\u5236\u65b9\u6cd5\uff0c\u4f46\u8fd9\u5e76\u4e0d\u662f\u53ef\u6269\u5c55\u7684\u65b9\u6cd5\u3002<\/p>\n<p>\u53ea\u6355\u6349\u6570\u636e\u5e93\u4e2d\u7684\u66f4\u6539\u5e76\u5c06\u8fd9\u4e9b\u66f4\u6539\u5e94\u7528\u4e8e\u76ee\u6807\u6570\u636e\u5e93\u624d\u662f\u53d8\u66f4\u6570\u636e\u6355\u6349\uff08CDC\uff09\u3002CDC\u53ef\u4ee5\u51cf\u5c11\u5f00\u9500\u5e76\u652f\u6301\u5b9e\u65f6\u5206\u6790\u3002\u5b83\u53ef\u4ee5\u5b9e\u73b0\u589e\u91cf\u52a0\u8f7d\u800c\u65e0\u9700\u8fdb\u884c\u6279\u91cf\u52a0\u8f7d\u3002<\/p>\n<h2>\u75be\u63a7\u4e2d\u5fc3\u7684\u65b9\u6cd5<\/h2>\n<p>1 &#8211; \u5f00\u53d1\u5185\u90e8\u5236\u9020CDC\u6d41\u7a0b\u3002<\/p>\n<p>\u590d\u6742\u7684\u4efb\u52a1\uff1aCDC\u6570\u636e\u590d\u5236\u5e76\u4e0d\u662f\u4e00\u6b21\u6027\u7b80\u5355\u7684\u89e3\u51b3\u65b9\u6848\u3002\u7531\u4e8e\u6570\u636e\u5e93\u63d0\u4f9b\u5546\u4e4b\u95f4\u7684\u5dee\u5f02\uff0c\u8bb0\u5f55\u683c\u5f0f\u4e0d\u540c\uff0c\u4e14\u8bbf\u95ee\u65e5\u5fd7\u8bb0\u5f55\u4e0d\u4fbf\uff0c\u56e0\u6b64CDC\u53d8\u5f97\u56f0\u96be\u3002<\/p>\n<p>\u5b9a\u671f\u7ef4\u62a4\uff1a\u7f16\u5199CDC\u8fc7\u7a0b\u811a\u672c\u662f\u7b2c\u4e00\u6b65\u3002\u5fc5\u987b\u5bf9\u4e0a\u8ff0\u53d8\u5316\u8fdb\u884c\u5b9a\u671f\u6620\u5c04\u7684\u5b9a\u5236\u89e3\u51b3\u65b9\u6848\u9700\u8981\u8fdb\u884c\u7ef4\u62a4\uff0c\u8fd9\u9700\u8981\u5927\u91cf\u65f6\u95f4\u548c\u8d44\u6e90\u3002<\/p>\n<p>\u8fc7\u5ea6\u8d1f\u62c5\uff1a\u4f01\u4e1a\u7684\u5f00\u53d1\u4eba\u5458\u5df2\u7ecf\u9762\u4e34\u7740\u5bf9\u5b98\u65b9\u67e5\u8be2\u7684\u8fc7\u5206\u4f9d\u8d56\u3002\u6784\u5efa\u81ea\u5b9a\u4e49CDC\u89e3\u51b3\u65b9\u6848\u9700\u8981\u989d\u5916\u7684\u5de5\u4f5c\u91cf\uff0c\u8fd9\u53ef\u80fd\u4f1a\u5bf9\u6b63\u5728\u4ea7\u751f\u5229\u6da6\u7684\u9879\u76ee\u4ea7\u751f\u5f71\u54cd\u3002<\/p>\n<p>2 &#8211; CDC\u5de5\u5177\u7684\u8fd0\u7528\uff1aDebezium\u3001Hevo Data\u3001IBM Infosphere\u3001Qlik Replicate\u3001Talend\u3001Oracle GoldenGate\u3001StreamSets\u7b49\u3002<\/p>\n<p>\u5728\u8fd9\u4e2a\u6f14\u793a\u4ee3\u7801\u5e93\u4e2d\uff0c\u5c06\u5229\u7528CDC\u5de5\u5177\u4f20\u9001\u8fc7\u6765\u7684CDC\u6570\u636e\u3002CDC\u5de5\u5177\u80fd\u591f\u8bfb\u53d6\u6570\u636e\u5e93\u65e5\u5fd7\uff0c\u56e0\u6b64\u5728\u66f4\u65b0\u7279\u5b9a\u5217\u65f6\u4e0d\u9700\u8981\u5f00\u53d1\u8005\u7684\u5e2e\u52a9\u3002<\/p>\n<p>\u50cfDebezium\u8fd9\u6837\u7684CDC\u5de5\u5177\u53ef\u4ee5\u6355\u83b7\u6240\u6709\u5df2\u66f4\u6539\u7684\u884c\u3002\u5728Kafka\u65e5\u5fd7\u4e2d\uff0c\u5e94\u7528\u7a0b\u5e8f\u4f1a\u8bb0\u5f55\u81ea\u5f00\u59cb\u4f7f\u7528\u4ee5\u6765\u7684\u6570\u636e\u66f4\u6539\u5386\u53f2\u3002<\/p>\n<h2>\u5982\u4f55\u5c06\u60a8\u7684SQL\u6570\u636e\u5e93\u4e0eLakehouse\u540c\u6b65\uff1f<\/h2>\n<p>\u4f7f\u7528CDC\u5de5\u5177\u3001\u81ea\u52a8\u52a0\u8f7d\u7a0b\u5e8f\u3001DLT\u7ba1\u9053\u6765\u5b9e\u65bdCDC\u6d41\u7a0b\u3002<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">CDC\u30c4\u30fc\u30eb\u304c\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u30ed\u30b0\u3092\u8aad\u307f\u8fbc\u307f\u3001\u5909\u66f4\u3092\u542b\u3080JSON\u30e1\u30c3\u30bb\u30fc\u30b8\u3092\u751f\u6210\u3057\u3001Kafka\u306b\u5bfe\u3057\u3066\u5909\u66f4\u8aac\u660e\u3092\u4f34\u3046\u30ec\u30b3\u30fc\u30c9\u3092\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">Kafka\u304cINSERT, UPDATE, DELETE\u30aa\u30da\u30ec\u30fc\u30b7\u30e7\u30f3\u3092\u542b\u3080\u30e1\u30c3\u30bb\u30fc\u30b8\u3092\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u3057\u3001\u30af\u30e9\u30a6\u30c9\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u30b9\u30c8\u30ec\u30fc\u30b8(S3\u3001ADLS\u306a\u3069)\u306b\u683c\u7d0d<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">Auto Loader\u3092\u7528\u3044\u3066\u30af\u30e9\u30a6\u30c9\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u30b9\u30c8\u30ec\u30fc\u30b8\u304b\u3089\u30e1\u30c3\u30bb\u30fc\u30b8\u3092\u30a4\u30f3\u30af\u30ea\u30e1\u30f3\u30bf\u30eb\u306b\u30ed\u30fc\u30c9\u3057\u3001\u751f\u306e\u30e1\u30c3\u30bb\u30fc\u30b8\u3068\u3057\u3066\u4fdd\u5b58\u3059\u308b\u305f\u3081\u306b\u30d6\u30ed\u30f3\u30ba\u30c6\u30fc\u30d6\u30eb\u306b\u683c\u7d0d<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\u6b21\u306b\u3001\u30af\u30ec\u30f3\u30b8\u30f3\u30b0\u3055\u308c\u305f\u30d6\u30ed\u30f3\u30ba\u30ec\u30a4\u30e4\u30fc\u30c6\u30fc\u30d6\u30eb\u306b APPLY CHANGES INTO \u3092\u5b9f\u884c\u3057\u3001\u5f8c\u6bb5\u306e\u30b7\u30eb\u30d0\u30fc\u30c6\u30fc\u30d6\u30eb\u306b\u6700\u65b0\u306e\u66f4\u65b0\u30c7\u30fc\u30bf\u3092\u4f1d\u642c<\/ul>\n<p>\u4ee5\u4e0b\u662f\u5904\u7406\u6765\u81ea\u5916\u90e8\u6570\u636e\u5e93\u7684CDC\u6570\u636e\u7684\u5b9e\u73b0\u3002\u8bf7\u6ce8\u610f\u8f93\u5165\u53ef\u4ee5\u662f\u4efb\u4f55\u683c\u5f0f\uff0c\u5305\u62ec\u50cfKafka\u8fd9\u6837\u7684\u6d88\u606f\u961f\u5217\u3002<\/p>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7d1f913a08637a69ed74\/39-0.png\" alt=\"Make all your data ready for BI and ML\" \/><\/div>\n<h2>\u50cfDebezium\u8fd9\u6837\u7684CDC\u5de5\u5177\u7684\u8f93\u51fa\u662f\u600e\u6837\u7684\uff1f<\/h2>\n<p>\u8868\u8fbe\u53d8\u66f4\u6570\u636e\u7684JSON\u6d88\u606f\u5177\u6709\u4e0e\u4ee5\u4e0b\u5217\u8868\u76f8\u4f3c\u7684\u6709\u8da3\u5b57\u6bb5\uff1a<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">operation: \u30aa\u30da\u30ec\u30fc\u30b7\u30e7\u30f3\u306e\u30b3\u30fc\u30c9(DELETE, APPEND, UPDATE, CREATE)<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">operation_date: \u305d\u308c\u305e\u308c\u306e\u30aa\u30da\u30ec\u30fc\u30b7\u30e7\u30f3\u306e\u30a2\u30af\u30b7\u30e7\u30f3\u304c\u3042\u3063\u305f\u65e5\u4ed8\u3001\u30bf\u30a4\u30e0\u30b9\u30bf\u30f3\u30d7<\/ul>\n<p>Debezium\u7684\u8f93\u51fa\u5305\u542b\u4ee5\u4e0b\u5b57\u6bb5\uff08\u6b64\u6f14\u793a\u4e2d\u4e0d\u5305\u542b\uff09\uff1a<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">before: \u5909\u66f4\u524d\u306e\u884c<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">after: \u5909\u66f4\u5f8c\u306e\u884c<\/ul>\n<p>\u8bf7\u67e5\u770b\u8fd9\u4e2a\u53c2\u8003\u8d44\u6599\u4ee5\u4e86\u89e3\u53ef\u80fd\u7684\u9886\u57df\u3002<\/p>\n<h2>\u4f7f\u7528Auto Loader\uff08\u4e91\u6587\u4ef6\uff09\u6765\u5b9e\u73b0\u589e\u91cf\u6570\u636e\u52a0\u8f7d\u3002<\/h2>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7d1f913a08637a69ed74\/47-0.png\" alt=\"\" \/><\/div>\n<p>\u901a\u8fc7\u6a21\u5f0f\u7684\u66f4\u65b0\uff0c\u4e0e\u5916\u90e8\u7cfb\u7edf\u7684\u534f\u8c03\u53ef\u80fd\u53d8\u5f97\u56f0\u96be\u3002\u5916\u90e8\u6570\u636e\u5e93\u4f1a\u8fdb\u884c\u6a21\u5f0f\u7684\u66f4\u65b0\u548c\u5217\u7684\u6dfb\u52a0\u3001\u66f4\u65b0\uff0c\u6211\u4eec\u7684\u7cfb\u7edf\u9700\u8981\u5bf9\u8fd9\u4e9b\u53d8\u5316\u5177\u6709\u5f3a\u5927\u7684\u9002\u5e94\u6027\u3002Databricks\u7684\u81ea\u52a8\u52a0\u8f7d\u5668\uff08cloudFiles\uff09\u53ef\u4ee5\u7acb\u5373\u5904\u7406\u6a21\u5f0f\u7684\u4f30\u7b97\u548c\u6f14\u5316\u3002<\/p>\n<p>\u901a\u8fc7\u4f7f\u7528Auto Loader\uff0c\u53ef\u4ee5\u4ece\u4e91\u5b58\u50a8\u4e2d\u5bfc\u5165\u6570\u767e\u4e07\u4e2a\u6587\u4ef6\uff0c\u5e76\u652f\u6301\u5927\u89c4\u6a21\u7684\u6a21\u5f0f\u4f30\u8ba1\u548c\u6f14\u5316\u3002\u5728\u8fd9\u4e2a\u7b14\u8bb0\u672c\u4e2d\uff0c\u6211\u4eec\u5c06\u5229\u7528Auto Loader\u6765\u5904\u7406\u6d41\u6570\u636e\uff08\u548c\u6279\u91cf\u6570\u636e\uff09\u3002<\/p>\n<p>\u6211\u4eec\u53ef\u4ee5\u4f7f\u7528Auto Loader\u6765\u521b\u5efa\u4e00\u4e2a\u6d41\u6c34\u7ebf\uff0c\u4ee5\u4fbf\u5bfc\u5165\u7531\u5916\u90e8\u63d0\u4f9b\u5546\u63d0\u4f9b\u7684\u539f\u59cbJSON\u6570\u636e\u3002<\/p>\n<h2>\u5206\u5e03\u5f0f\u5206\u7c7b\u8d26\u6280\u672f\uff08DLT\uff09\u662f\u4f7f\u7528Python\u7f16\u5199\u7684\u4ee3\u7801\u7ed3\u6784\u3002<\/h2>\n<p>\u4e3a\u4e86\u4f7f\u7528\u76f8\u5173\u65b9\u6cd5\uff0c\u60a8\u9700\u8981\u5bfc\u5165dlt Python\u6a21\u5757\u3002\u5728\u8fd9\u91cc\uff0c\u6211\u4eec\u4e5f\u5bfc\u5165\u4e86pyspark.sql.functions\u3002<\/p>\n<p>DLT\u7684\u8868\u683c\u3001\u89c6\u56fe\u548c\u5173\u8054\u8bbe\u7f6e\u662f\u4f7f\u7528\u88c5\u9970\u5668\u8fdb\u884c\u8bbe\u7f6e\u7684\u3002<\/p>\n<p>\u5982\u679c\u4f60\u6ca1\u6709\u63a5\u89e6\u8fc7Python\u7684\u88c5\u9970\u5668\uff0c\u90a3\u4e48\u4f60\u53ef\u4ee5\u5c06\u5176\u89c6\u4e3a\u5728Python\u811a\u672c\u4e2d\u4e0e\u4e0b\u4e00\u4e2a\u88ab\u8868\u793a\u4e3a\u51fd\u6570\u4e92\u52a8\u7684\u4ee5@\u5f00\u59cb\u7684\u51fd\u6570\u6216\u7c7b\u3002<\/p>\n<p>@dlt.table\u88c5\u9970\u5668\u662f\u5c06Python\u51fd\u6570\u8f6c\u6362\u4e3aDelta Live\u8868\u7684\u57fa\u672c\u65b9\u6cd5\u3002<\/p>\n<p>\u4ee5\u4e0b\u6211\u4eec\u5c06\u63a2\u7d22\u5230\u8fbe\u6570\u636e\u3002<\/p>\n<h3>\u9752\u94dc\u684c-\u81ea\u52a8\u88c5\u586b\u5668\u548cDLT<\/h3>\n<pre class=\"post-pre\"><code><span class=\"c1\">## \u30b9\u30c8\u30ec\u30fc\u30b8\u30d1\u30b9\u304b\u3089\u53d6\u5f97\u3059\u308b\u751f\u306eJSON\u30c7\u30fc\u30bf\u3092\u542b\u3080\u30d6\u30ed\u30f3\u30ba\u30c6\u30fc\u30d6\u30eb\u306e\u4f5c\u6210\r\n<\/span><span class=\"kn\">import<\/span> <span class=\"n\">dlt<\/span>\r\n<span class=\"kn\">from<\/span> <span class=\"n\">pyspark.sql.functions<\/span> <span class=\"kn\">import<\/span> <span class=\"o\">*<\/span>\r\n<span class=\"kn\">from<\/span> <span class=\"n\">pyspark.sql.types<\/span> <span class=\"kn\">import<\/span> <span class=\"o\">*<\/span>\r\n\r\n<span class=\"n\">source<\/span> <span class=\"o\">=<\/span> <span class=\"n\">spark<\/span><span class=\"p\">.<\/span><span class=\"n\">conf<\/span><span class=\"p\">.<\/span><span class=\"nf\">get<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">source<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span>\r\n\r\n<span class=\"nd\">@dlt.table<\/span><span class=\"p\">(<\/span><span class=\"n\">name<\/span><span class=\"o\">=<\/span><span class=\"sh\">\"<\/span><span class=\"s\">customer_bronze<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span>\r\n                  <span class=\"n\">comment<\/span> <span class=\"o\">=<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">\u30af\u30e9\u30a6\u30c9\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u30b9\u30c8\u30ec\u30fc\u30b8\u306e\u30e9\u30f3\u30c7\u30a3\u30f3\u30b0\u30be\u30fc\u30f3\u304b\u3089\u30a4\u30f3\u30af\u30ea\u30e1\u30f3\u30bf\u30eb\u306b\u53d6\u308a\u8fbc\u307e\u308c\u308b\u65b0\u898f\u9867\u5ba2<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span>\r\n  <span class=\"n\">table_properties<\/span><span class=\"o\">=<\/span><span class=\"p\">{<\/span>\r\n    <span class=\"sh\">\"<\/span><span class=\"s\">quality<\/span><span class=\"sh\">\"<\/span><span class=\"p\">:<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">bronze<\/span><span class=\"sh\">\"<\/span>\r\n  <span class=\"p\">}<\/span>\r\n<span class=\"p\">)<\/span>\r\n\r\n<span class=\"k\">def<\/span> <span class=\"nf\">customer_bronze<\/span><span class=\"p\">():<\/span>\r\n  <span class=\"nf\">return <\/span><span class=\"p\">(<\/span>\r\n    <span class=\"n\">spark<\/span><span class=\"p\">.<\/span><span class=\"n\">readStream<\/span><span class=\"p\">.<\/span><span class=\"nf\">format<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">cloudFiles<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span> \\\r\n      <span class=\"p\">.<\/span><span class=\"nf\">option<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">cloudFiles.format<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">json<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span> \\\r\n      <span class=\"p\">.<\/span><span class=\"nf\">option<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">cloudFiles.inferColumnTypes<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">true<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span> \\\r\n      <span class=\"p\">.<\/span><span class=\"nf\">load<\/span><span class=\"p\">(<\/span><span class=\"sa\">f<\/span><span class=\"sh\">\"<\/span><span class=\"si\">{<\/span><span class=\"n\">source<\/span><span class=\"si\">}<\/span><span class=\"s\">\/customers<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span>\r\n  <span class=\"p\">)<\/span>\r\n<\/code><\/pre>\n<h3>\u94f6\u5c42 &#8211; \u7ecf\u8fc7\u6e05\u6d01\u7684\u684c\u5b50\uff08\u53d7\u5230\u9650\u5236\u7684\u5e94\u7528\uff09<\/h3>\n<pre class=\"post-pre\"><code><span class=\"nd\">@dlt.table<\/span><span class=\"p\">(<\/span><span class=\"n\">name<\/span><span class=\"o\">=<\/span><span class=\"sh\">\"<\/span><span class=\"s\">customer_bronze_clean<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span>\r\n  <span class=\"n\">comment<\/span><span class=\"o\">=<\/span><span class=\"sh\">\"<\/span><span class=\"s\">\u30af\u30ec\u30f3\u30b8\u30f3\u30b0\u3055\u308c\u305f\u30d6\u30ed\u30f3\u30ba\u9867\u5ba2\u30d3\u30e5\u30fc(\u30b7\u30eb\u30d0\u30fc\u306b\u306a\u308b\u30c6\u30fc\u30d6\u30eb\u3067\u3059)<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span>\r\n\r\n<span class=\"nd\">@dlt.expect_or_drop<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">valid_id<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">id IS NOT NULL<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span>\r\n<span class=\"nd\">@dlt.expect<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">valid_address<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">address IS NOT NULL<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span>\r\n<span class=\"nd\">@dlt.expect_or_drop<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">valid_operation<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">operation IS NOT NULL<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span>\r\n\r\n<span class=\"k\">def<\/span> <span class=\"nf\">customer_bronze_clean<\/span><span class=\"p\">():<\/span>\r\n  <span class=\"k\">return<\/span> <span class=\"n\">dlt<\/span><span class=\"p\">.<\/span><span class=\"nf\">read_stream<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">customer_bronze<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span> \\\r\n            <span class=\"p\">.<\/span><span class=\"nf\">select<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">address<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">email<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">id<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">firstname<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">lastname<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">operation<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">operation_date<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">_rescued_data<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span>\r\n<\/code><\/pre>\n<h2>\u94f6\u8272\u684c\u5b50\u7684\u5177\u8c61\u5316<\/h2>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7d1f913a08637a69ed74\/62-0.png\" alt=\"Make all your data ready for BI and ML\" \/><\/div>\n<p>\u5728\u540d\u4e3acustomer_silver\u7684\u94f6\u8272\u8868\u4e2d\uff0c\u5305\u542b\u4e86\u6700\u65b0\u7684\u89c6\u56fe\u3002\u5b83\u662f\u539f\u59cb\u8868\u7684\u526f\u672c\u3002<\/p>\n<p>\u5728DLT\u7ba1\u9053\u8bbe\u7f6e\u4e2d\u6dfb\u52a0\u5e76\u542f\u7528applyChanges\u914d\u7f6e\uff0c\u4ee5\u660e\u786e\u542f\u7528\u6b64\u529f\u80fd\uff0c\u4ece\u800c\u5c06Apply Changes Into\u64cd\u4f5c\u4f20\u64ad\u5230\u540e\u7eed\u7684\u94f6\u5c42\u3002<\/p>\n<h3>\u5220\u9664\u4e0d\u5fc5\u8981\u7684\u5ba2\u6237\u8bb0\u5f55 &#8211; \u94f6\u8272\u8868 &#8211; \u4f7f\u7528 DLT Python<\/h3>\n<pre class=\"post-pre\"><code><span class=\"n\">dlt<\/span><span class=\"p\">.<\/span><span class=\"nf\">create_target_table<\/span><span class=\"p\">(<\/span><span class=\"n\">name<\/span><span class=\"o\">=<\/span><span class=\"sh\">\"<\/span><span class=\"s\">customer_silver<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span>\r\n  <span class=\"n\">comment<\/span><span class=\"o\">=<\/span><span class=\"sh\">\"<\/span><span class=\"s\">\u30af\u30ec\u30f3\u30b8\u30f3\u30b0\u3001\u30de\u30fc\u30b8\u3055\u308c\u305f\u9867\u5ba2<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span>\r\n  <span class=\"n\">table_properties<\/span><span class=\"o\">=<\/span><span class=\"p\">{<\/span>\r\n    <span class=\"sh\">\"<\/span><span class=\"s\">quality<\/span><span class=\"sh\">\"<\/span><span class=\"p\">:<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">silver<\/span><span class=\"sh\">\"<\/span>\r\n  <span class=\"p\">}<\/span>\r\n<span class=\"p\">)<\/span>\r\n<\/code><\/pre>\n<pre class=\"post-pre\"><code><span class=\"n\">dlt<\/span><span class=\"p\">.<\/span><span class=\"nf\">apply_changes<\/span><span class=\"p\">(<\/span>\r\n  <span class=\"n\">target<\/span> <span class=\"o\">=<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">customer_silver<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"c1\"># \u30de\u30c6\u30ea\u30a2\u30e9\u30a4\u30ba\u3055\u308c\u308b\u9867\u5ba2\u30c6\u30fc\u30d6\u30eb\r\n<\/span>  <span class=\"n\">source<\/span> <span class=\"o\">=<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">customer_bronze_clean<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"c1\"># \u5165\u529b\u306eCDC\r\n<\/span>  <span class=\"n\">keys<\/span> <span class=\"o\">=<\/span> <span class=\"p\">[<\/span><span class=\"sh\">\"<\/span><span class=\"s\">id<\/span><span class=\"sh\">\"<\/span><span class=\"p\">],<\/span> <span class=\"c1\"># upsert\/delete\u3059\u308b\u305f\u3081\u306b\u884c\u3092\u30de\u30c3\u30c1\u3059\u308b\u969b\u306e\u4e3b\u30ad\u30fc\r\n<\/span>  <span class=\"n\">sequence_by<\/span> <span class=\"o\">=<\/span> <span class=\"nf\">col<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">operation_date<\/span><span class=\"sh\">\"<\/span><span class=\"p\">),<\/span> <span class=\"c1\"># \u6700\u65b0\u306e\u5024\u3092\u53d6\u5f97\u3059\u308b\u305f\u3081\u306b\u30aa\u30da\u30ec\u30fc\u30b7\u30e7\u30f3\u65e5\u306b\u3088\u308b\u91cd\u8907\u6392\u9664\r\n<\/span>  <span class=\"n\">apply_as_deletes<\/span> <span class=\"o\">=<\/span> <span class=\"nf\">expr<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">operation = <\/span><span class=\"sh\">'<\/span><span class=\"s\">DELETE<\/span><span class=\"sh\">'\"<\/span><span class=\"p\">),<\/span> <span class=\"c1\"># DELETE\u306e\u6761\u4ef6\r\n<\/span>  <span class=\"n\">except_column_list<\/span> <span class=\"o\">=<\/span> <span class=\"p\">[<\/span><span class=\"sh\">\"<\/span><span class=\"s\">operation<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">operation_date<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">_rescued_data<\/span><span class=\"sh\">\"<\/span><span class=\"p\">]<\/span> <span class=\"c1\"># \u30e1\u30bf\u30c7\u30fc\u30bf\u30ab\u30e9\u30e0\u306e\u524a\u9664\r\n<\/span><span class=\"p\">)<\/span>\r\n<\/code><\/pre>\n<h2>\u521b\u5efa\u7ba1\u9053<\/h2>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7d1f913a08637a69ed74\/69-1.png\" alt=\"Screen Shot 2022-12-15 at 18.45.21.png\" \/><\/div>\n<p>\u4ee5\u4e0b\u662f\u672c\u6b21\u8bbe\u7f6e\u7684JSON\u793a\u4f8b\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"p\">{<\/span>\r\n    <span class=\"nl\">\"id\"<\/span><span class=\"p\">:<\/span> <span class=\"s2\">\"7c2607ec-47a2-48d9-8572-9e498656b573\"<\/span><span class=\"p\">,<\/span>\r\n    <span class=\"nl\">\"clusters\"<\/span><span class=\"p\">:<\/span> <span class=\"p\">[<\/span>\r\n        <span class=\"p\">{<\/span>\r\n            <span class=\"nl\">\"label\"<\/span><span class=\"p\">:<\/span> <span class=\"s2\">\"default\"<\/span><span class=\"p\">,<\/span>\r\n            <span class=\"nl\">\"autoscale\"<\/span><span class=\"p\">:<\/span> <span class=\"p\">{<\/span>\r\n                <span class=\"nl\">\"min_workers\"<\/span><span class=\"p\">:<\/span> <span class=\"mi\">1<\/span><span class=\"p\">,<\/span>\r\n                <span class=\"nl\">\"max_workers\"<\/span><span class=\"p\">:<\/span> <span class=\"mi\">5<\/span><span class=\"p\">,<\/span>\r\n                <span class=\"nl\">\"mode\"<\/span><span class=\"p\">:<\/span> <span class=\"s2\">\"ENHANCED\"<\/span>\r\n            <span class=\"p\">}<\/span>\r\n        <span class=\"p\">}<\/span>\r\n    <span class=\"p\">],<\/span>\r\n    <span class=\"nl\">\"development\"<\/span><span class=\"p\">:<\/span> <span class=\"kc\">true<\/span><span class=\"p\">,<\/span>\r\n    <span class=\"nl\">\"continuous\"<\/span><span class=\"p\">:<\/span> <span class=\"kc\">false<\/span><span class=\"p\">,<\/span>\r\n    <span class=\"nl\">\"channel\"<\/span><span class=\"p\">:<\/span> <span class=\"s2\">\"CURRENT\"<\/span><span class=\"p\">,<\/span>\r\n    <span class=\"nl\">\"edition\"<\/span><span class=\"p\">:<\/span> <span class=\"s2\">\"ADVANCED\"<\/span><span class=\"p\">,<\/span>\r\n    <span class=\"nl\">\"photon\"<\/span><span class=\"p\">:<\/span> <span class=\"kc\">false<\/span><span class=\"p\">,<\/span>\r\n    <span class=\"nl\">\"libraries\"<\/span><span class=\"p\">:<\/span> <span class=\"p\">[<\/span>\r\n        <span class=\"p\">{<\/span>\r\n            <span class=\"nl\">\"notebook\"<\/span><span class=\"p\">:<\/span> <span class=\"p\">{<\/span>\r\n                <span class=\"nl\">\"path\"<\/span><span class=\"p\">:<\/span> <span class=\"s2\">\"\/Users\/takaaki.yayoi@databricks.com\/20221212_dlt_cdc\/2-Retail_DLT_CDC_Python\"<\/span>\r\n            <span class=\"p\">}<\/span>\r\n        <span class=\"p\">}<\/span>\r\n    <span class=\"p\">],<\/span>\r\n    <span class=\"nl\">\"name\"<\/span><span class=\"p\">:<\/span> <span class=\"s2\">\"dlt_cdc_retail\"<\/span><span class=\"p\">,<\/span>\r\n    <span class=\"nl\">\"storage\"<\/span><span class=\"p\">:<\/span> <span class=\"s2\">\"\/tmp\/takaaki.yayoi@databricks.com\/demo\/dlt_cdc\"<\/span><span class=\"p\">,<\/span>\r\n    <span class=\"nl\">\"configuration\"<\/span><span class=\"p\">:<\/span> <span class=\"p\">{<\/span>\r\n        <span class=\"nl\">\"source\"<\/span><span class=\"p\">:<\/span> <span class=\"s2\">\"\/tmp\/takaaki.yayoi@databricks.com\/demo\/cdc_raw\"<\/span>\r\n    <span class=\"p\">},<\/span>\r\n    <span class=\"nl\">\"target\"<\/span><span class=\"p\">:<\/span> <span class=\"s2\">\"cdc_data_taka\"<\/span>\r\n<span class=\"p\">}<\/span>\r\n<\/code><\/pre>\n<h2>\u6267\u884c\u6d41\u6c34\u7ebf<\/h2>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7d1f913a08637a69ed74\/73-0.png\" alt=\"Screen Shot 2022-12-15 at 18.53.37.png\" \/><\/div>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7d1f913a08637a69ed74\/74-0.png\" alt=\"Screen Shot 2022-12-15 at 18.54.02.png\" \/><\/div>\n<p>\u6267\u884c\u6d41\u6c34\u7ebf\u540e\uff0c\u5c06\u5206\u6790\u4e8b\u4ef6\u65e5\u5fd7\u4ee5\u76d1\u63a7\u4e8b\u4ef6\u65e5\u5fd7\u548c\u8840\u7edf\u6570\u636e\u3002<\/p>\n<h1>\u4e8b\u4ef6\u65e5\u5fd7\u7684\u5206\u6790<\/h1>\n<p>&nbsp;<\/p>\n<p>\u6bcf\u4e2a DLT \u7ba1\u9053\u90fd\u4e0e\u5176\u5728\u7ba1\u9053\u4e2d\u5b9a\u4e49\u7684\u5b58\u50a8\u4f4d\u7f6e\u62e5\u6709\u81ea\u5df1\u7684\u4e8b\u4ef6\u8868\u3002\u901a\u8fc7\u8fd9\u4e2a\u8868\uff0c\u53ef\u4ee5\u786e\u8ba4\u6b63\u5728\u53d1\u751f\u7684\u60c5\u51b5\uff0c\u5e76\u68c0\u67e5\u901a\u8fc7\u7ba1\u9053\u7684\u6570\u636e\u8d28\u91cf\u3002<\/p>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7d1f913a08637a69ed74\/79-0.png\" alt=\"undefined\" \/><\/div>\n<h2>\u5b89\u88c5<\/h2>\n<pre class=\"post-pre\"><code><span class=\"o\">%<\/span><span class=\"k\">sql<\/span> \r\n<span class=\"c1\">-- \u9069\u5b9c\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u3092\u6307\u5b9a\u3057\u3066\u304f\u3060\u3055\u3044<\/span>\r\n<span class=\"k\">CREATE<\/span> <span class=\"k\">TABLE<\/span> <span class=\"n\">IF<\/span> <span class=\"k\">NOT<\/span> <span class=\"k\">EXISTS<\/span> <span class=\"n\">cdc_data_taka<\/span><span class=\"p\">.<\/span><span class=\"n\">demo_cdc_dlt_system_event_log_raw<\/span> <span class=\"k\">using<\/span> <span class=\"n\">delta<\/span> <span class=\"k\">LOCATION<\/span> <span class=\"s1\">'$storage_path\/system\/events'<\/span><span class=\"p\">;<\/span>\r\n<span class=\"k\">select<\/span> <span class=\"o\">*<\/span> <span class=\"k\">from<\/span> <span class=\"n\">cdc_data_taka<\/span><span class=\"p\">.<\/span><span class=\"n\">demo_cdc_dlt_system_event_log_raw<\/span><span class=\"p\">;<\/span>\r\n<\/code><\/pre>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7d1f913a08637a69ed74\/82-0.png\" alt=\"Screen Shot 2022-12-15 at 18.56.53.png\" \/><\/div>\n<h2>Delta Live Tables\u7684\u671f\u671b\u5206\u6790<\/h2>\n<p>\u901a\u8fc7Delta Live Tables\uff0c\u53ef\u4ee5\u901a\u8fc7\u671f\u671b\u8ddf\u8e2a\u6570\u636e\u8d28\u91cf\u3002\u8fd9\u4e9b\u671f\u671b\u4f1a\u4e0eDLT\u7684\u65e5\u5fd7\u4e8b\u4ef6\u4e00\u8d77\u5b58\u50a8\u4e3a\u6280\u672f\u8868\u3002\u4e3a\u4e86\u5206\u6790\u8fd9\u4e9b\u4fe1\u606f\uff0c\u53ef\u4ee5\u7b80\u5355\u5730\u521b\u5efa\u4e00\u4e2a\u89c6\u56fe\u3002<\/p>\n<h2>1 &#8211; \u4e8b\u4ef6\u65e5\u5fd7\u7684\u5206\u6790<\/h2>\n<p>\u5728\u8be6\u60c5\u680f\u4e2d\u5305\u542b\u4e86\u53d1\u9001\u5230\u4e8b\u4ef6\u65e5\u5fd7\u7684\u6bcf\u4e2a\u4e8b\u4ef6\u7684\u5143\u6570\u636e\u3002\u6839\u636e\u4e8b\u4ef6\u7c7b\u578b\uff0c\u5b57\u6bb5\u4f1a\u6709\u6240\u4e0d\u540c\u3002\u4ee5\u4e0b\u662f\u884c\u7a0b\u75b2\u52b3\u7684\u793a\u4f8b\u3002<\/p>\n<div>\n<div class=\"post-table\">\u30a4\u30d9\u30f3\u30c8\u306e\u30bf\u30a4\u30d7\u6319\u52d5<code>user_action<\/code>\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u306e\u4f5c\u6210\u306e\u3088\u3046\u306a\u30a2\u30af\u30b7\u30e7\u30f3\u304c\u884c\u308f\u308c\u305f\u969b\u306b\u751f\u3058\u308b\u30a4\u30d9\u30f3\u30c8<code>flow_definition<\/code>\u30d1\u30a4\u30d7\u30e9\u30a4\u30f3\u306e\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8\u3084\u30a2\u30c3\u30d7\u30c7\u30fc\u30c8\u304c\u884c\u308f\u308c\u305f\u969b\u306b\u751f\u3058\u308b\u30a4\u30d9\u30f3\u30c8\u3067\u3042\u308a\u3001\u30ea\u30cd\u30fc\u30b8\u30e5\u3001\u30b9\u30ad\u30fc\u30de\u3001\u5b9f\u884c\u8a08\u753b\u60c5\u5831\u3092\u6301\u3061\u307e\u3059<code>output_dataset<\/code> \u3068 <code>input_datasets<\/code>\u51fa\u529b\u306e\u30c6\u30fc\u30d6\u30eb\/\u30d3\u30e5\u30fc\u3001\u524d\u6bb5\u306e\u30c6\u30fc\u30d6\u30eb\/\u30d3\u30e5\u30fc<code>flow_type<\/code>\u30b3\u30f3\u30d7\u30ea\u30fc\u30c8\u30d5\u30ed\u30fc\u304b\u8ffd\u52a0\u306e\u30d5\u30ed\u30fc\u304b<code>explain_text<\/code>Spark\u306e\u5b9f\u884c\u8a08\u753b<code>flow_progress<\/code>\u30c7\u30fc\u30bf\u30d5\u30ed\u30fc\u304c\u30c7\u30fc\u30bf\u30d0\u30c3\u30c1\u306e\u51e6\u7406\u3092\u958b\u59cb\u3042\u308b\u3044\u306f\u5b8c\u4e86\u3057\u305f\u969b\u306b\u751f\u3058\u308b\u30a4\u30d9\u30f3\u30c8<code>metrics<\/code>\u73fe\u5728\u306f<code>num_output_rows<\/code>\u304c\u542b\u307e\u308c\u3066\u3044\u307e\u3059<code>data_quality<\/code> (<code>dropped_records<\/code>), (<code>expectations<\/code>: <code>name<\/code>, <code>dataset<\/code>, <code>passed_records<\/code>, <code>failed_records<\/code>)\u3053\u306e\u7279\u5b9a\u306e\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u306b\u5bfe\u3059\u308b\u30c7\u30fc\u30bf\u54c1\u8cea\u30eb\u30fc\u30eb\u306e\u7d50\u679c\u306e\u914d\u5217\u304c\u542b\u307e\u308c\u307e\u3059 * <code>expectations<\/code><\/div>\n<\/div>\n<h3>\u4e8b\u4ef6\u65e5\u5fd7 &#8211; \u6839\u636e\u65f6\u95f4\u6233\u6392\u5e8f\u7684\u539f\u59cb\u4e8b\u4ef6<\/h3>\n<pre class=\"post-pre\"><code><span class=\"c1\">-- \u9069\u5b9c\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u3092\u6307\u5b9a\u3057\u3066\u304f\u3060\u3055\u3044<\/span>\r\n<span class=\"k\">SELECT<\/span> \r\n       <span class=\"n\">id<\/span><span class=\"p\">,<\/span>\r\n       <span class=\"nb\">timestamp<\/span><span class=\"p\">,<\/span>\r\n       <span class=\"n\">sequence<\/span><span class=\"p\">,<\/span>\r\n       <span class=\"n\">event_type<\/span><span class=\"p\">,<\/span>\r\n       <span class=\"n\">message<\/span><span class=\"p\">,<\/span>\r\n       <span class=\"k\">level<\/span><span class=\"p\">,<\/span> \r\n       <span class=\"n\">details<\/span>\r\n  <span class=\"k\">FROM<\/span> <span class=\"n\">cdc_data_taka<\/span><span class=\"p\">.<\/span><span class=\"n\">demo_cdc_dlt_system_event_log_raw<\/span>\r\n <span class=\"k\">ORDER<\/span> <span class=\"k\">BY<\/span> <span class=\"nb\">timestamp<\/span> <span class=\"k\">ASC<\/span>\r\n<\/code><\/pre>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7d1f913a08637a69ed74\/90-0.png\" alt=\"Screen Shot 2022-12-15 at 18.58.20.png\" \/><\/div>\n<h2>2- DLT\u7684\u884d\u751f\u7269<\/h2>\n<pre class=\"post-pre\"><code><span class=\"o\">%<\/span><span class=\"k\">sql<\/span>\r\n<span class=\"c1\">-- \u30bf\u30a4\u30d7\u3068\u6700\u65b0\u306e\u5909\u66f4\u3054\u3068\u306b\u51fa\u529b\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u4e00\u89a7\u3057\u307e\u3059<\/span>\r\n<span class=\"c1\">-- \u9069\u5b9c\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u3092\u6307\u5b9a\u3057\u3066\u304f\u3060\u3055\u3044<\/span>\r\n<span class=\"k\">create<\/span> <span class=\"k\">or<\/span> <span class=\"k\">replace<\/span> <span class=\"k\">temp<\/span> <span class=\"k\">view<\/span> <span class=\"n\">cdc_dlt_expectations<\/span> <span class=\"k\">as<\/span> <span class=\"p\">(<\/span>\r\n  <span class=\"k\">SELECT<\/span> \r\n    <span class=\"n\">id<\/span><span class=\"p\">,<\/span>\r\n    <span class=\"nb\">timestamp<\/span><span class=\"p\">,<\/span>\r\n    <span class=\"n\">details<\/span><span class=\"p\">:<\/span><span class=\"n\">flow_progress<\/span><span class=\"p\">.<\/span><span class=\"n\">metrics<\/span><span class=\"p\">.<\/span><span class=\"n\">num_output_rows<\/span> <span class=\"k\">as<\/span> <span class=\"n\">output_records<\/span><span class=\"p\">,<\/span>\r\n    <span class=\"n\">details<\/span><span class=\"p\">:<\/span><span class=\"n\">flow_progress<\/span><span class=\"p\">.<\/span><span class=\"n\">data_quality<\/span><span class=\"p\">.<\/span><span class=\"n\">dropped_records<\/span><span class=\"p\">,<\/span>\r\n    <span class=\"n\">details<\/span><span class=\"p\">:<\/span><span class=\"n\">flow_progress<\/span><span class=\"p\">.<\/span><span class=\"n\">status<\/span> <span class=\"k\">as<\/span> <span class=\"n\">status_update<\/span><span class=\"p\">,<\/span>\r\n    <span class=\"n\">explode<\/span><span class=\"p\">(<\/span><span class=\"n\">from_json<\/span><span class=\"p\">(<\/span><span class=\"n\">details<\/span><span class=\"p\">:<\/span><span class=\"n\">flow_progress<\/span><span class=\"p\">.<\/span><span class=\"n\">data_quality<\/span><span class=\"p\">.<\/span><span class=\"n\">expectations<\/span>\r\n             <span class=\"p\">,<\/span><span class=\"s1\">'array&lt;struct&lt;dataset: string, failed_records: bigint, name: string, passed_records: bigint&gt;&gt;'<\/span><span class=\"p\">))<\/span> <span class=\"n\">expectations<\/span>\r\n  <span class=\"k\">FROM<\/span> <span class=\"n\">cdc_data_taka<\/span><span class=\"p\">.<\/span><span class=\"n\">demo_cdc_dlt_system_event_log_raw<\/span>\r\n  <span class=\"k\">where<\/span> <span class=\"n\">details<\/span><span class=\"p\">:<\/span><span class=\"n\">flow_progress<\/span><span class=\"p\">.<\/span><span class=\"n\">data_quality<\/span><span class=\"p\">.<\/span><span class=\"n\">expectations<\/span> <span class=\"k\">is<\/span> <span class=\"k\">not<\/span> <span class=\"k\">null<\/span>\r\n  <span class=\"k\">ORDER<\/span> <span class=\"k\">BY<\/span> <span class=\"nb\">timestamp<\/span><span class=\"p\">);<\/span>\r\n\r\n<span class=\"k\">select<\/span> <span class=\"o\">*<\/span> <span class=\"k\">from<\/span> <span class=\"n\">cdc_dlt_expectations<\/span>\r\n<\/code><\/pre>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7d1f913a08637a69ed74\/93-0.png\" alt=\"Screen Shot 2022-12-15 at 18.58.52.png\" \/><\/div>\n<pre class=\"post-pre\"><code><span class=\"o\">%<\/span><span class=\"k\">sql<\/span>\r\n<span class=\"c1\">----------------------------------------------------------------------------------------<\/span>\r\n<span class=\"c1\">-- \u30ea\u30cd\u30fc\u30b8\u30e5<\/span>\r\n<span class=\"c1\">----------------------------------------------------------------------------------------<\/span>\r\n<span class=\"k\">SELECT<\/span> <span class=\"n\">max_timestamp<\/span><span class=\"p\">,<\/span>\r\n       <span class=\"n\">details<\/span><span class=\"p\">:<\/span><span class=\"n\">flow_definition<\/span><span class=\"p\">.<\/span><span class=\"n\">output_dataset<\/span><span class=\"p\">,<\/span>\r\n       <span class=\"n\">details<\/span><span class=\"p\">:<\/span><span class=\"n\">flow_definition<\/span><span class=\"p\">.<\/span><span class=\"n\">input_datasets<\/span><span class=\"p\">,<\/span>\r\n       <span class=\"n\">details<\/span><span class=\"p\">:<\/span><span class=\"n\">flow_definition<\/span><span class=\"p\">.<\/span><span class=\"n\">flow_type<\/span><span class=\"p\">,<\/span>\r\n       <span class=\"n\">details<\/span><span class=\"p\">:<\/span><span class=\"n\">flow_definition<\/span><span class=\"p\">.<\/span><span class=\"k\">schema<\/span><span class=\"p\">,<\/span>\r\n       <span class=\"n\">details<\/span><span class=\"p\">:<\/span><span class=\"n\">flow_definition<\/span><span class=\"p\">.<\/span><span class=\"n\">explain_text<\/span><span class=\"p\">,<\/span>\r\n       <span class=\"n\">details<\/span><span class=\"p\">:<\/span><span class=\"n\">flow_definition<\/span>\r\n  <span class=\"k\">FROM<\/span> <span class=\"n\">cdc_data_taka<\/span><span class=\"p\">.<\/span><span class=\"n\">demo_cdc_dlt_system_event_log_raw<\/span> <span class=\"n\">e<\/span>\r\n <span class=\"k\">INNER<\/span> <span class=\"k\">JOIN<\/span> <span class=\"p\">(<\/span>\r\n              <span class=\"k\">SELECT<\/span> <span class=\"n\">details<\/span><span class=\"p\">:<\/span><span class=\"n\">flow_definition<\/span><span class=\"p\">.<\/span><span class=\"n\">output_dataset<\/span> <span class=\"n\">output_dataset<\/span><span class=\"p\">,<\/span>\r\n                     <span class=\"k\">MAX<\/span><span class=\"p\">(<\/span><span class=\"nb\">timestamp<\/span><span class=\"p\">)<\/span> <span class=\"n\">max_timestamp<\/span>\r\n                <span class=\"k\">FROM<\/span> <span class=\"n\">cdc_data_taka<\/span><span class=\"p\">.<\/span><span class=\"n\">demo_cdc_dlt_system_event_log_raw<\/span>\r\n               <span class=\"k\">WHERE<\/span> <span class=\"n\">details<\/span><span class=\"p\">:<\/span><span class=\"n\">flow_definition<\/span><span class=\"p\">.<\/span><span class=\"n\">output_dataset<\/span> <span class=\"k\">IS<\/span> <span class=\"k\">NOT<\/span> <span class=\"k\">NULL<\/span>\r\n               <span class=\"k\">GROUP<\/span> <span class=\"k\">BY<\/span> <span class=\"n\">details<\/span><span class=\"p\">:<\/span><span class=\"n\">flow_definition<\/span><span class=\"p\">.<\/span><span class=\"n\">output_dataset<\/span>\r\n            <span class=\"p\">)<\/span> <span class=\"n\">m<\/span>\r\n  <span class=\"k\">WHERE<\/span> <span class=\"n\">e<\/span><span class=\"p\">.<\/span><span class=\"nb\">timestamp<\/span> <span class=\"o\">=<\/span> <span class=\"n\">m<\/span><span class=\"p\">.<\/span><span class=\"n\">max_timestamp<\/span>\r\n    <span class=\"k\">AND<\/span> <span class=\"n\">e<\/span><span class=\"p\">.<\/span><span class=\"n\">details<\/span><span class=\"p\">:<\/span><span class=\"n\">flow_definition<\/span><span class=\"p\">.<\/span><span class=\"n\">output_dataset<\/span> <span class=\"o\">=<\/span> <span class=\"n\">m<\/span><span class=\"p\">.<\/span><span class=\"n\">output_dataset<\/span>\r\n<span class=\"c1\">--    AND e.details:flow_definition IS NOT NULL<\/span>\r\n <span class=\"k\">ORDER<\/span> <span class=\"k\">BY<\/span> <span class=\"n\">e<\/span><span class=\"p\">.<\/span><span class=\"n\">details<\/span><span class=\"p\">:<\/span><span class=\"n\">flow_definition<\/span><span class=\"p\">.<\/span><span class=\"n\">output_dataset<\/span>\r\n<span class=\"p\">;<\/span>\r\n<\/code><\/pre>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7d1f913a08637a69ed74\/95-0.png\" alt=\"Screen Shot 2022-12-15 at 18.59.18.png\" \/><\/div>\n<h2>3 &#8211; \u8cea\u91cf\u6307\u6a19<\/h2>\n<pre class=\"post-pre\"><code><span class=\"o\">%<\/span><span class=\"k\">sql<\/span> \r\n<span class=\"k\">select<\/span> <span class=\"k\">sum<\/span><span class=\"p\">(<\/span><span class=\"n\">expectations<\/span><span class=\"p\">.<\/span><span class=\"n\">failed_records<\/span><span class=\"p\">)<\/span> <span class=\"k\">as<\/span> <span class=\"n\">failed_records<\/span><span class=\"p\">,<\/span> \r\n<span class=\"k\">sum<\/span><span class=\"p\">(<\/span><span class=\"n\">expectations<\/span><span class=\"p\">.<\/span><span class=\"n\">passed_records<\/span><span class=\"p\">)<\/span> <span class=\"k\">as<\/span> <span class=\"n\">passed_records<\/span><span class=\"p\">,<\/span> \r\n<span class=\"n\">expectations<\/span><span class=\"p\">.<\/span><span class=\"n\">name<\/span> \r\n<span class=\"k\">from<\/span> <span class=\"n\">cdc_dlt_expectations<\/span> \r\n<span class=\"k\">group<\/span> <span class=\"k\">by<\/span> <span class=\"n\">expectations<\/span><span class=\"p\">.<\/span><span class=\"n\">name<\/span>\r\n<\/code><\/pre>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7d1f913a08637a69ed74\/98-0.png\" alt=\"Screen Shot 2022-12-15 at 19.00.06.png\" \/><\/div>\n<h2>4 &#8211; \u68c0\u67e5\u5546\u4e1a\u6c47\u603b\u4fe1\u606f<\/h2>\n<pre class=\"post-pre\"><code><span class=\"o\">%<\/span><span class=\"n\">python<\/span> \r\n<span class=\"kn\">import<\/span> <span class=\"n\">plotly.express<\/span> <span class=\"k\">as<\/span> <span class=\"n\">px<\/span>\r\n<span class=\"n\">expectations_metrics<\/span> <span class=\"o\">=<\/span> <span class=\"n\">spark<\/span><span class=\"p\">.<\/span><span class=\"nf\">sql<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"\"\"<\/span><span class=\"s\">select sum(expectations.failed_records) as failed_records, \r\n                                 sum(expectations.passed_records) as passed_records, \r\n                                 expectations.name \r\n                                 from cdc_dlt_expectations\r\n                                 group by expectations.name<\/span><span class=\"sh\">\"\"\"<\/span><span class=\"p\">).<\/span><span class=\"nf\">toPandas<\/span><span class=\"p\">()<\/span>\r\n<span class=\"n\">px<\/span><span class=\"p\">.<\/span><span class=\"nf\">bar<\/span><span class=\"p\">(<\/span><span class=\"n\">expectations_metrics<\/span><span class=\"p\">,<\/span> <span class=\"n\">x<\/span><span class=\"o\">=<\/span><span class=\"sh\">\"<\/span><span class=\"s\">name<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"n\">y<\/span><span class=\"o\">=<\/span><span class=\"p\">[<\/span><span class=\"sh\">\"<\/span><span class=\"s\">passed_records<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">failed_records<\/span><span class=\"sh\">\"<\/span><span class=\"p\">],<\/span> <span class=\"n\">title<\/span><span class=\"o\">=<\/span><span class=\"sh\">\"<\/span><span class=\"s\">DLT expectations metrics<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span>\r\n<\/code><\/pre>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7d1f913a08637a69ed74\/101-0.png\" alt=\"Screen Shot 2022-12-15 at 19.00.52.png\" \/><\/div>\n<h1>\u603b\u7ed3<\/h1>\n<p>\u901a\u8fc7\u4f7f\u7528Delta Live Tables\uff0c\u60a8\u53ef\u4ee5\u8f7b\u677e\u5229\u7528\u53d8\u66f4\u6570\u636e\u6355\u83b7\uff08Change Data Capture\uff09\u5c06\u53ea\u4f20\u64ad\u66f4\u6539\u7684\u6570\u636e\u4f20\u9012\u5230\u4e0b\u4e00\u4e2a\u8868\u3002\u901a\u8fc7\u5229\u7528\u4e8b\u4ef6\u65e5\u5fd7\u6570\u636e\uff0c\u8fd8\u53ef\u4ee5\u76d1\u63a7\u7ba1\u9053\u5904\u7406\u7684\u72b6\u6001\uff0c\u4ece\u800c\u6784\u5efa\u80fd\u591f\u540c\u65f6\u4fdd\u6301\u6570\u636e\u7ba1\u9053\u8d28\u91cf\u5e76\u6267\u884c\u591a\u79cd\u5904\u7406\u7684\u7ba1\u9053\u3002\u8bf7\u52a1\u5fc5\u5c1d\u8bd5\u4f7f\u7528DLT\u8fdb\u884cCDC\uff01<\/p>\n<h3>Databricks \u514d\u8d39\u8bd5\u7528<\/h3>\n<p>\u8fbe\u767e\u514b\u514d\u8d39\u8bd5\u7528<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6211\u4f1a\u6d4f\u89c8\u8fd9\u91cc\u53d1\u5e03\u7684\u7b14\u8bb0\u672c\u3002\u751f\u6210\u53d1\u751f\u53d8\u5316\u7684\u6570\u636e\uff0c\u5e76\u4f7f\u7528Delta Live Tables\uff08DLT\uff09\u7684\u66f4\u6539\u6570\u636e\u6355\u83b7 [&hellip;]<\/p>\n","protected":false},"author":8,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-47193","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"yoast_head":"<!-- This site is optimized with the Yoast SEO Premium plugin v21.5 (Yoast SEO v21.5) - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>\u5c1d\u8bd5\u4f7f\u7528Delta Live Tables\u7684\u53d8\u66f4\u6570\u636e\u6355\u83b7\uff08CDC\uff09 - Blog - Silicon Cloud<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.silicloud.com\/zh\/blog\/\u5c1d\u8bd5\u4f7f\u7528delta-live-tables\u7684\u53d8\u66f4\u6570\u636e\u6355\u83b7\uff08cdc\uff09\u3002\/\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u5c1d\u8bd5\u4f7f\u7528Delta Live Tables\u7684\u53d8\u66f4\u6570\u636e\u6355\u83b7\uff08CDC\uff09\" \/>\n<meta property=\"og:description\" content=\"\u6211\u4f1a\u6d4f\u89c8\u8fd9\u91cc\u53d1\u5e03\u7684\u7b14\u8bb0\u672c\u3002\u751f\u6210\u53d1\u751f\u53d8\u5316\u7684\u6570\u636e\uff0c\u5e76\u4f7f\u7528Delta Live Tables\uff08DLT\uff09\u7684\u66f4\u6539\u6570\u636e\u6355\u83b7 [&hellip;]\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.silicloud.com\/zh\/blog\/\u5c1d\u8bd5\u4f7f\u7528delta-live-tables\u7684\u53d8\u66f4\u6570\u636e\u6355\u83b7\uff08cdc\uff09\u3002\/\" \/>\n<meta property=\"og:site_name\" content=\"Blog - Silicon Cloud\" \/>\n<meta property=\"article:published_time\" content=\"2024-03-08T17:19:03+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2024-04-28T17:49:20+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7d1f913a08637a69ed74\/14-0.png\" \/>\n<meta name=\"author\" content=\"\u96c5, \u609f\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"\u96c5, \u609f\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"5 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8delta-live-tables%e7%9a%84%e5%8f%98%e6%9b%b4%e6%95%b0%e6%8d%ae%e6%8d%95%e8%8e%b7%ef%bc%88cdc%ef%bc%89%e3%80%82\/\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8delta-live-tables%e7%9a%84%e5%8f%98%e6%9b%b4%e6%95%b0%e6%8d%ae%e6%8d%95%e8%8e%b7%ef%bc%88cdc%ef%bc%89%e3%80%82\/\",\"name\":\"\u5c1d\u8bd5\u4f7f\u7528Delta Live Tables\u7684\u53d8\u66f4\u6570\u636e\u6355\u83b7\uff08CDC\uff09 - Blog - Silicon Cloud\",\"isPartOf\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\"},\"datePublished\":\"2024-03-08T17:19:03+00:00\",\"dateModified\":\"2024-04-28T17:49:20+00:00\",\"author\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/f044a4b7fa4ee2701702942002419ca6\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8delta-live-tables%e7%9a%84%e5%8f%98%e6%9b%b4%e6%95%b0%e6%8d%ae%e6%8d%95%e8%8e%b7%ef%bc%88cdc%ef%bc%89%e3%80%82\/#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8delta-live-tables%e7%9a%84%e5%8f%98%e6%9b%b4%e6%95%b0%e6%8d%ae%e6%8d%95%e8%8e%b7%ef%bc%88cdc%ef%bc%89%e3%80%82\/\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8delta-live-tables%e7%9a%84%e5%8f%98%e6%9b%b4%e6%95%b0%e6%8d%ae%e6%8d%95%e8%8e%b7%ef%bc%88cdc%ef%bc%89%e3%80%82\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.silicloud.com\/zh\/blog\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u5c1d\u8bd5\u4f7f\u7528Delta Live Tables\u7684\u53d8\u66f4\u6570\u636e\u6355\u83b7\uff08CDC\uff09\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/\",\"name\":\"Blog - Silicon Cloud\",\"description\":\"\",\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/f044a4b7fa4ee2701702942002419ca6\",\"name\":\"\u96c5, \u609f\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/secure.gravatar.com\/avatar\/e71a913e914f1aad1efc391f92084294bac54bc782acd289638580134cf667a6?s=96&d=mm&r=g\",\"contentUrl\":\"https:\/\/secure.gravatar.com\/avatar\/e71a913e914f1aad1efc391f92084294bac54bc782acd289638580134cf667a6?s=96&d=mm&r=g\",\"caption\":\"\u96c5, \u609f\"},\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/author\/yawu\/\"},{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8delta-live-tables%e7%9a%84%e5%8f%98%e6%9b%b4%e6%95%b0%e6%8d%ae%e6%8d%95%e8%8e%b7%ef%bc%88cdc%ef%bc%89%e3%80%82\/#local-main-organization-logo\",\"url\":\"\",\"contentUrl\":\"\",\"caption\":\"Blog - Silicon Cloud\"}]}<\/script>\n<!-- \/ Yoast SEO Premium plugin. -->","yoast_head_json":{"title":"\u5c1d\u8bd5\u4f7f\u7528Delta Live Tables\u7684\u53d8\u66f4\u6570\u636e\u6355\u83b7\uff08CDC\uff09 - Blog - Silicon Cloud","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.silicloud.com\/zh\/blog\/\u5c1d\u8bd5\u4f7f\u7528delta-live-tables\u7684\u53d8\u66f4\u6570\u636e\u6355\u83b7\uff08cdc\uff09\u3002\/","og_locale":"zh_CN","og_type":"article","og_title":"\u5c1d\u8bd5\u4f7f\u7528Delta Live Tables\u7684\u53d8\u66f4\u6570\u636e\u6355\u83b7\uff08CDC\uff09","og_description":"\u6211\u4f1a\u6d4f\u89c8\u8fd9\u91cc\u53d1\u5e03\u7684\u7b14\u8bb0\u672c\u3002\u751f\u6210\u53d1\u751f\u53d8\u5316\u7684\u6570\u636e\uff0c\u5e76\u4f7f\u7528Delta Live Tables\uff08DLT\uff09\u7684\u66f4\u6539\u6570\u636e\u6355\u83b7 [&hellip;]","og_url":"https:\/\/www.silicloud.com\/zh\/blog\/\u5c1d\u8bd5\u4f7f\u7528delta-live-tables\u7684\u53d8\u66f4\u6570\u636e\u6355\u83b7\uff08cdc\uff09\u3002\/","og_site_name":"Blog - Silicon Cloud","article_published_time":"2024-03-08T17:19:03+00:00","article_modified_time":"2024-04-28T17:49:20+00:00","og_image":[{"url":"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7d1f913a08637a69ed74\/14-0.png"}],"author":"\u96c5, \u609f","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"\u96c5, \u609f","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"5 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8delta-live-tables%e7%9a%84%e5%8f%98%e6%9b%b4%e6%95%b0%e6%8d%ae%e6%8d%95%e8%8e%b7%ef%bc%88cdc%ef%bc%89%e3%80%82\/","url":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8delta-live-tables%e7%9a%84%e5%8f%98%e6%9b%b4%e6%95%b0%e6%8d%ae%e6%8d%95%e8%8e%b7%ef%bc%88cdc%ef%bc%89%e3%80%82\/","name":"\u5c1d\u8bd5\u4f7f\u7528Delta Live Tables\u7684\u53d8\u66f4\u6570\u636e\u6355\u83b7\uff08CDC\uff09 - Blog - Silicon Cloud","isPartOf":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website"},"datePublished":"2024-03-08T17:19:03+00:00","dateModified":"2024-04-28T17:49:20+00:00","author":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/f044a4b7fa4ee2701702942002419ca6"},"breadcrumb":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8delta-live-tables%e7%9a%84%e5%8f%98%e6%9b%b4%e6%95%b0%e6%8d%ae%e6%8d%95%e8%8e%b7%ef%bc%88cdc%ef%bc%89%e3%80%82\/#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8delta-live-tables%e7%9a%84%e5%8f%98%e6%9b%b4%e6%95%b0%e6%8d%ae%e6%8d%95%e8%8e%b7%ef%bc%88cdc%ef%bc%89%e3%80%82\/"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8delta-live-tables%e7%9a%84%e5%8f%98%e6%9b%b4%e6%95%b0%e6%8d%ae%e6%8d%95%e8%8e%b7%ef%bc%88cdc%ef%bc%89%e3%80%82\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.silicloud.com\/zh\/blog\/"},{"@type":"ListItem","position":2,"name":"\u5c1d\u8bd5\u4f7f\u7528Delta Live Tables\u7684\u53d8\u66f4\u6570\u636e\u6355\u83b7\uff08CDC\uff09"}]},{"@type":"WebSite","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website","url":"https:\/\/www.silicloud.com\/zh\/blog\/","name":"Blog - Silicon Cloud","description":"","inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/f044a4b7fa4ee2701702942002419ca6","name":"\u96c5, \u609f","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/","url":"https:\/\/secure.gravatar.com\/avatar\/e71a913e914f1aad1efc391f92084294bac54bc782acd289638580134cf667a6?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/e71a913e914f1aad1efc391f92084294bac54bc782acd289638580134cf667a6?s=96&d=mm&r=g","caption":"\u96c5, \u609f"},"url":"https:\/\/www.silicloud.com\/zh\/blog\/author\/yawu\/"},{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8delta-live-tables%e7%9a%84%e5%8f%98%e6%9b%b4%e6%95%b0%e6%8d%ae%e6%8d%95%e8%8e%b7%ef%bc%88cdc%ef%bc%89%e3%80%82\/#local-main-organization-logo","url":"","contentUrl":"","caption":"Blog - Silicon Cloud"}]}},"_links":{"self":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/47193","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/users\/8"}],"replies":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/comments?post=47193"}],"version-history":[{"count":2,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/47193\/revisions"}],"predecessor-version":[{"id":72856,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/47193\/revisions\/72856"}],"wp:attachment":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/media?parent=47193"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/categories?post=47193"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/tags?post=47193"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}