{"id":47148,"date":"2023-04-11T07:13:38","date_gmt":"2024-01-15T16:37:15","guid":{"rendered":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%9c%a8databricks%e4%b8%ad%e5%85%a8%e9%9d%a2%e8%bf%90%e7%94%a8%e7%bb%93%e6%9e%84%e5%8c%96%e6%b5%81%e5%a4%84%e7%90%86%e6%8a%80%e6%9c%af\/"},"modified":"2024-04-29T22:56:14","modified_gmt":"2024-04-29T14:56:14","slug":"%e5%9c%a8databricks%e4%b8%ad%e5%85%a8%e9%9d%a2%e8%bf%90%e7%94%a8%e7%bb%93%e6%9e%84%e5%8c%96%e6%b5%81%e5%a4%84%e7%90%86%e6%8a%80%e6%9c%af","status":"publish","type":"post","link":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%9c%a8databricks%e4%b8%ad%e5%85%a8%e9%9d%a2%e8%bf%90%e7%94%a8%e7%bb%93%e6%9e%84%e5%8c%96%e6%b5%81%e5%a4%84%e7%90%86%e6%8a%80%e6%9c%af\/","title":{"rendered":"\u5728Databricks\u4e2d\u5168\u9762\u8fd0\u7528\u7ed3\u6784\u5316\u6d41\u5904\u7406\u6280\u672f"},"content":{"rendered":"<p>\u5728\u751f\u4ea7\u73af\u5883\u4e2d\u4f7f\u7528\u7684\u7ed3\u6784\u5316\u6d41\u5f0f\u5904\u7406 | Databricks \u5728 AWS \u4e0a [\u622a\u81f3 2022\/3\/21]<\/p>\n<div>\u9019\u672c\u66f8\u4e26\u975e\u539f\u8457\uff0c\u7121\u6cd5\u4fdd\u8b49\u5167\u5bb9\u6e96\u78ba\u3002\u5982\u9700\u8a73\u7d30\u5167\u5bb9\uff0c\u8acb\u53c3\u8003\u539f\u6587\u3002<\/div>\n<p>\u5c06\u7b14\u8bb0\u672c\u8fde\u63a5\u5230\u7fa4\u96c6\u5e76\u8fdb\u884c\u4ea4\u4e92\u5f0f\u6d41\u67e5\u8be2\u975e\u5e38\u65b9\u4fbf\u3002\u4f46\u662f\uff0c\u5728\u751f\u4ea7\u73af\u5883\u4e2d\u6267\u884c\u65f6\uff0c\u9700\u8981\u66f4\u9ad8\u7684\u53ef\u9760\u6027\u548c\u53ef\u7528\u6027\u3002\u672c\u4e66\u5c06\u8ba8\u8bba\u4f7f\u7528Databricks\u4f5c\u4e1a\u6784\u5efa\u66f4\u5177\u5bb9\u9519\u6027\u7684\u6d41\u5f0f\u5e94\u7528\u7684\u65b9\u6cd5\u3002<\/p>\n<h1>\u5b9a\u4e49\u6d41\u6570\u636e\u5904\u7406\u7684\u65f6\u673a<\/h1>\n<p>\u4e3a\u4e86\u5b9a\u4e49\u6d41\u6570\u636e\u5904\u7406\u7684\u65f6\u673a\uff0c\u53ef\u4ee5\u4f7f\u7528\u89e6\u53d1\u5668\u3002\u5982\u679c\u8bbe\u7f6e\u89e6\u53d1\u5668\u7684\u65f6\u95f4\u6bb5\u592a\u77ed\uff08\u5728\u51e0\u5341\u79d2\u5185\uff09\uff0c\u7cfb\u7edf\u53ef\u80fd\u4f1a\u6267\u884c\u4e0d\u5fc5\u8981\u7684\u5904\u7406\u4ee5\u68c0\u67e5\u65b0\u6570\u636e\u7684\u5230\u8fbe\u3002\u4f5c\u4e3a\u6700\u4f73\u5b9e\u8df5\uff0c\u5efa\u8bae\u8c03\u6574\u89e6\u53d1\u5668\u4ee5\u6700\u5c0f\u5316\u6210\u672c\u3002<\/p>\n<h1>\u4ece\u67e5\u8be2\u5931\u8d25\u4e2d\u6062\u590d<\/h1>\n<p>\u5728\u751f\u4ea7\u7ea7\u522b\u7684\u6d41\u5a92\u4f53\u5e94\u7528\u7a0b\u5e8f\u4e2d\uff0c\u9700\u8981\u5177\u5907\u5065\u58ee\u7684\u9519\u8bef\u5904\u7406\u529f\u80fd\u3002\u5728\u7ed3\u6784\u5316\u6d41\u5a92\u4f53\u4e2d\uff0c\u542f\u7528\u5bf9\u6d41\u5a92\u4f53\u67e5\u8be2\u7684\u68c0\u67e5\u70b9\u53ef\u4ee5\u5728\u5931\u8d25\u540e\u91cd\u65b0\u542f\u52a8\u9519\u8bef\uff0c\u5e76\u4fdd\u8bc1\u6545\u969c\u5bb9\u5fcd\u6027\u548c\u6570\u636e\u4e00\u81f4\u6027\uff0c\u4ece\u53d1\u751f\u6545\u969c\u7684\u5730\u65b9\u7ee7\u7eed\u67e5\u8be2\u3002\u4e3a\u4e86\u589e\u5f3a\u67e5\u8be2\u7684\u6545\u969c\u5bb9\u5fcd\u6027\uff0c\u60a8\u5e94\u8be5\u542f\u7528\u68c0\u67e5\u70b9\uff0c\u5e76\u914d\u7f6eDatabricks\u4f5c\u4e1a\u4ee5\u5728\u9519\u8bef\u540e\u81ea\u52a8\u91cd\u65b0\u542f\u52a8\u67e5\u8be2\u3002<\/p>\n<h2>\u542f\u7528\u68c0\u67e5\u70b9<\/h2>\n<p>\u8981\u542f\u7528\u68c0\u67e5\u70b9\uff0c\u8bf7\u5728\u5f00\u59cb\u67e5\u8be2\u4e4b\u524d\u8bbe\u7f6echeckpointLocation\u9009\u9879\u4e3aDBFS\u6216\u4e91\u5b58\u50a8\u8def\u5f84\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"nv\">streamingDataFrame<\/span><span class=\"o\">.<\/span><span class=\"py\">writeStream<\/span>\r\n  <span class=\"o\">.<\/span><span class=\"py\">format<\/span><span class=\"o\">(<\/span><span class=\"s\">\"parquet\"<\/span><span class=\"o\">)<\/span>\r\n  <span class=\"o\">.<\/span><span class=\"py\">option<\/span><span class=\"o\">(<\/span><span class=\"s\">\"path\"<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"dbfs:\/\/outputPath\/\"<\/span><span class=\"o\">)<\/span>\r\n  <span class=\"o\">.<\/span><span class=\"py\">option<\/span><span class=\"o\">(<\/span><span class=\"s\">\"checkpointLocation\"<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"dbfs:\/\/checkpointPath\"<\/span><span class=\"o\">)<\/span>\r\n  <span class=\"o\">.<\/span><span class=\"py\">start<\/span><span class=\"o\">()<\/span>\r\n<\/code><\/pre>\n<p>\u5728\u8fd9\u4e2a\u68c0\u67e5\u70b9\u7684\u4f4d\u7f6e\u4e0a\uff0c\u5c06\u4fdd\u5b58\u53ef\u4ee5\u552f\u4e00\u8bc6\u522b\u67e5\u8be2\u7684\u57fa\u672c\u4fe1\u606f\u3002\u56e0\u6b64\uff0c\u6bcf\u4e2a\u67e5\u8be2\u90fd\u9700\u8981\u4e0d\u540c\u7684\u68c0\u67e5\u70b9\u4f4d\u7f6e\uff0c\u4e0d\u80fd\u5728\u591a\u4e2a\u67e5\u8be2\u4e2d\u6307\u5b9a\u76f8\u540c\u7684\u4f4d\u7f6e\u3002\u6709\u5173\u8be6\u7ec6\u4fe1\u606f\uff0c\u8bf7\u53c2\u8003Structured Streaming\u7f16\u7a0b\u6307\u5357\u3002<\/p>\n<div>\u8bf7\u6ce8\u610f\uff0c\u5728\u8bb8\u591a\u8f93\u51fa\u7c7b\u578b\u7684Sink\u4e2d\uff0ccheckpointLocation\u9009\u9879\u662f\u5fc5\u9700\u7684\u3002\u4f46\u662f\u5728\u4e00\u4e9b\u50cf\u5185\u5b58Sink\u8fd9\u6837\u7684\u7c7b\u578b\u4e2d\uff0c\u5982\u679c\u4e0d\u6307\u5b9acheckpointLocation\uff0c\u5c06\u81ea\u52a8\u5728DBFS\u4e0a\u751f\u6210\u4e34\u65f6\u7684\u68c0\u67e5\u70b9\u5b58\u50a8\u4f4d\u7f6e\u3002\u4e34\u65f6\u7684\u68c0\u67e5\u70b9\u5b58\u50a8\u4f4d\u7f6e\u4e0d\u80fd\u4fdd\u8bc1\u5bb9\u9519\u6027\u548c\u6570\u636e\u4e00\u81f4\u6027\uff0c\u4e5f\u53ef\u80fd\u6ca1\u6709\u5f97\u5230\u9002\u5f53\u7684\u6e05\u9664\u3002\u4f5c\u4e3a\u6700\u4f73\u5b9e\u8df5\uff0c\u5efa\u8bae\u59cb\u7ec8\u6307\u5b9acheckpointLocation\u9009\u9879\u3002<\/div>\n<h2>\u8bbe\u7f6e\u5728\u6d41\u67e5\u8be2\u5931\u8d25\u65f6\u91cd\u65b0\u542f\u52a8\u7684\u4f5c\u4e1a\u3002<\/h2>\n<p>\u521b\u5efa\u4e00\u4e2a\u8fd0\u884c\u60a8\u7684\u6d41\u67e5\u8be2\u7b14\u8bb0\u672c\u4ee5\u53ca\u6267\u884cJAR\u7684Databricks\u4f5c\u4e1a\uff0c\u5e76\u6309\u7167\u4ee5\u4e0b\u65b9\u5f0f\u8fdb\u884c\u914d\u7f6e\u3002<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u5e38\u306b\u65b0\u898f\u30af\u30e9\u30b9\u30bf\u30fc\u3092\u4f7f\u7528<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\u5931\u6557\u6642\u306b\u306f\u5e38\u306b\u30ea\u30c8\u30e9\u30a4<\/ul>\n<p>\u5de5\u4f5c\u4e0e\u7ed3\u6784\u5316\u6d41API\u7d27\u5bc6\u96c6\u6210\uff0c\u53ef\u4ee5\u76d1\u89c6\u6d3b\u52a8\u7684\u6d41\u5f0f\u67e5\u8be2\u7684\u6240\u6709\u6267\u884c\u4f5c\u4e1a\u3002\u901a\u8fc7\u8fd9\u4e2a\u8bbe\u7f6e\uff0c\u5f53\u67d0\u4e2a\u67e5\u8be2\u53d1\u751f\u9519\u8bef\u65f6\uff0c\u4f5c\u4e1a\u4f1a\u81ea\u52a8\u505c\u6b62\u5904\u7406\uff08\u5305\u62ec\u5176\u4ed6\u67e5\u8be2\uff09\uff0c\u5e76\u5728\u65b0\u7684\u96c6\u7fa4\u4e0a\u542f\u52a8\u5904\u7406\u4ee5\u786e\u4fdd\u5904\u7406\u7684\u8fde\u7eed\u6027\u3002\u5728\u65b0\u7684\u6267\u884c\u4e2d\uff0c\u4f1a\u91cd\u65b0\u8fd0\u884c\u7b14\u8bb0\u672c\u6216JAR\u4ee3\u7801\uff0c\u5e76\u91cd\u65b0\u542f\u52a8\u6240\u6709\u7684\u67e5\u8be2\u3002\u8fd9\u662f\u786e\u4fdd\u6062\u590d\u6b63\u5e38\u72b6\u6001\u7684\u6700\u5b89\u5168\u7684\u65b9\u6cd5\u3002<\/p>\n<div>\u6ce8\u610f\uff01\u4e0d\u652f\u6301\u5728\u7b46\u8a18\u578b\u5de5\u4f5c\u6d41\u4e2d\u57f7\u884c\u9577\u6642\u9593\u8655\u7406\u7684\u4efb\u52d9\u3002\u56e0\u6b64\uff0c\u4e0d\u5efa\u8b70\u5728\u6d41\u5f0f\u4f5c\u696d\u4e2d\u4f7f\u7528\u7b46\u8a18\u578b\u5de5\u4f5c\u6d41\u3002<\/div>\n<div>\u8bf7\u6ce8\u610f\uff0c\u4efb\u4f55\u6d3b\u52a8\u7684\u6d41\u5f0f\u67e5\u8be2\u9519\u8bef\u90fd\u4f1a\u5bfc\u81f4\u6d3b\u52a8\u7684\u8fd0\u884c\u5931\u8d25\uff0c\u5e76\u505c\u6b62\u5176\u4ed6\u6240\u6709\u7684\u6d41\u5f0f\u67e5\u8be2\u3002\u5728\u7b14\u8bb0\u672c\u7684\u672b\u5c3e\uff0c\u4e0d\u9700\u8981\u4f7f\u7528streamingQuery.awaitTermination()\u6216spark.streams.awaitAnyTermination()\u3002\u5f53\u6d41\u5f0f\u67e5\u8be2\u5904\u4e8e\u6d3b\u52a8\u72b6\u6001\u65f6\uff0c\u4f5c\u4e1a\u4f1a\u81ea\u52a8\u907f\u514d\u5904\u7406\u7684\u505c\u6b62\u3002<\/div>\n<p>\u4ee5\u4e0b\u662f\u63a8\u8350\u7684\u5de5\u4f5c\u8bbe\u7f6e\u8be6\u7ec6\u4fe1\u606f\u3002<\/p>\n<p>Cluster: \u5e38\u306b\u65b0\u898f\u30af\u30e9\u30b9\u30bf\u30fc\u3092\u4f7f\u7528\u3057\u3001\u6700\u65b0\u306eSpark\u30d0\u30fc\u30b8\u30e7\u30f3(\u3042\u308b\u3044\u306f\u5c11\u306a\u304f\u3068\u3082\u30d0\u30fc\u30b8\u30e7\u30f32.1)\u3092\u4f7f\u7528\u3059\u308b\u3088\u3046\u306b\u8a2d\u5b9a\u3057\u307e\u3059\u3002Spark 2.1\u4ee5\u964d\u306e\u30af\u30a8\u30ea\u30fc\u306f\u3001\u30af\u30a8\u30ea\u30fc\u5f8c\u306e\u5fa9\u65e7\u304c\u53ef\u80fd\u3067\u3059\u3002<\/p>\n<p>Alerts: \u51e6\u7406\u5931\u6557\u6642\u306b\u30e1\u30fc\u30eb\u306e\u901a\u77e5\u3092\u53d7\u3051\u53d6\u308a\u305f\u3044\u5834\u5408\u306b\u8a2d\u5b9a\u3057\u307e\u3059\u3002<\/p>\n<p>Schedule: \u30b9\u30b1\u30b8\u30e5\u30fc\u30eb\u306f\u8a2d\u5b9a\u3057\u307e\u305b\u3093\u3002<\/p>\n<p>Timeout: *\u30bf\u30a4\u30e0\u30a2\u30a6\u30c8\u306f\u8a2d\u5b9a\u3057\u307e\u305b\u3093\u3002*\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30af\u30a8\u30ea\u30fc\u306f\u7121\u9650\u306b\u9577\u3044\u671f\u9593\u5b9f\u884c\u3055\u308c\u307e\u3059\u3002<\/p>\n<p>Maximum concurrent runs: 1\u306b\u8a2d\u5b9a\u3057\u307e\u3059\u3002\u305d\u308c\u305e\u308c\u306e\u30af\u30a8\u30ea\u30fc\u306b\u5bfe\u3057\u3066\u3001\u540c\u6642\u306b1\u3064\u306e\u30a4\u30f3\u30b9\u30bf\u30f3\u30b9\u306e\u307f\u304c\u30a2\u30af\u30c6\u30a3\u30d6\u3067\u3042\u308b\u3053\u3068\u3092\u8a31\u53ef\u3057\u307e\u3059\u3002<\/p>\n<p>Retries: Unlimited\u306b\u8a2d\u5b9a\u3057\u307e\u3059\u3002<\/p>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7ca7913a08637a69d0f4\/20-0.png\" alt=\"\" \/><\/div>\n<h2>\u6d41\u5a92\u4f53\u67e5\u8be2\u4e2d\u7684\u53d8\u66f4\u540e\u6062\u590d<\/h2>\n<p>\u5728\u6d41\u5f0f\u67e5\u8be2\u4e2d\uff0c\u5173\u4e8e\u4ece\u540c\u4e00\u68c0\u67e5\u70b9\u91cd\u65b0\u542f\u52a8\u65f6\u5141\u8bb8\u7684\u66f4\u6539\u5b58\u5728\u9650\u5236\u3002\u8fd9\u4e9b\u66f4\u6539\u7c7b\u578b\u53ef\u80fd\u662f\u4e0d\u5141\u8bb8\u7684\uff0c\u6216\u8005\u5176\u5f71\u54cd\u672a\u786e\u5b9a\u3002\u4ee5\u4e0b\u662f\u6240\u6709\u7c7b\u578b\u5171\u540c\u7684\u4e00\u4e9b\u65b9\u9762\u3002<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u7528\u8a9e\u300c\u8a31\u53ef\u3055\u308c\u308b\u300d\u306f\u3001\u7279\u5b9a\u306e\u5909\u66f4\u3092\u884c\u3046\u3053\u3068\u306f\u3067\u304d\u307e\u3059\u304c\u3001\u52b9\u679c\u306e\u30bb\u30de\u30f3\u30c6\u30a3\u30af\u30b9(\u610f\u5473)\u306f\u30af\u30a8\u30ea\u30fc\u3068\u5909\u66f4\u5185\u5bb9\u306b\u57fa\u3065\u3044\u3066\u660e\u78ba\u306b\u5b9a\u7fa9\u3055\u308c\u308b(well-defined)\u3053\u3068\u3092\u610f\u5473\u3057\u307e\u3059\u3002<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u7528\u8a9e\u300c\u8a31\u53ef\u3055\u308c\u306a\u3044\u300d\u306f\u3001\u4e88\u671f\u3055\u308c\u306a\u3044\u30a8\u30e9\u30fc\u306b\u3088\u308a\u30af\u30a8\u30ea\u30fc\u306e\u518d\u8d77\u52d5\u304c\u5931\u6557\u3059\u308b\u53ef\u80fd\u6027\u304c\u9ad8\u3044\u305f\u3081\u3001\u7279\u5b9a\u306e\u5909\u66f4\u3092\u884c\u3046\u3079\u304d\u3067\u306f\u306a\u3044\u3053\u3068\u3092\u610f\u5473\u3057\u307e\u3059\u3002<\/ul>\n<\/li>\n<\/ul>\n<p>sdf\u306fsparkSession.readStream\u306b\u3088\u3063\u3066\u4f5c\u6210\u3055\u308c\u308b\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\/\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u610f\u5473\u3057\u307e\u3059\u3002<\/p>\n<h2>\u6539\u53d8\u7684\u7c7b\u578b de<\/h2>\n<p>\u5165\u529b\u30bd\u30fc\u30b9\u306e\u6570\u3084\u578b\u306e\u5909\u66f4(\u7570\u306a\u308b\u30bd\u30fc\u30b9\u306a\u3069): \u3053\u308c\u306f\u8a31\u53ef\u3055\u308c\u307e\u305b\u3093\u3002<\/p>\n<p>\u5165\u529b\u30bd\u30fc\u30b9\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u30fc\u306e\u5909\u66f4: \u5909\u66f4\u306e\u30bb\u30de\u30f3\u30c6\u30a3\u30af\u30b9\u304cwell-defined\u306b\u306a\u308b\u304b\u3069\u3046\u304b\u306f\u3001\u30bd\u30fc\u30b9\u3068\u30af\u30a8\u30ea\u30fc\u306b\u4f9d\u5b58\u3057\u307e\u3059\u3002\u3053\u3061\u3089\u306b\u3044\u304f\u3064\u304b\u306e\u4f8b\u3092\u793a\u3057\u307e\u3059\u3002<\/p>\n<p>\u30ec\u30fc\u30c8\u30ea\u30df\u30c3\u30c8\u306e\u8ffd\u52a0\u3001\u524a\u9664\u3001\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u3059\u3002<\/p>\n<p>Scala<br \/>\nspark.readStream.format(&#8220;kafka&#8221;).option(&#8220;subscribe&#8221;, &#8220;article&#8221;)<\/p>\n<p>\u304b\u3089\u4ee5\u4e0b\u3078\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u3059\u3002<\/p>\n<p>Scala<br \/>\nspark.readStream.format(&#8220;kafka&#8221;).option(&#8220;subscribe&#8221;, &#8220;article&#8221;).option(&#8220;maxOffsetsPerTrigger&#8221;, &#8230;)<\/p>\n<p>\u30b5\u30d6\u30b9\u30af\u30e9\u30a4\u30d6\u3055\u308c\u305f\u30a2\u30fc\u30c6\u30a3\u30af\u30eb\u3001\u30d5\u30a1\u30a4\u30eb\u306e\u5909\u66f4\u306f\u3001\u7d50\u679c\u304c\u4e88\u671f\u3055\u308c\u306a\u3044\u305f\u3081\u3001\u901a\u5e38\u306f\u8a31\u53ef\u3055\u308c\u307e\u305b\u3093\u3002spark.readStream.format(&#8220;kafka&#8221;).option(&#8220;subscribe&#8221;, &#8220;article&#8221;)\u304b\u3089spark.readStream.format(&#8220;kafka&#8221;).option(&#8220;subscribe&#8221;, &#8220;newarticle&#8221;)\u3078\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u305b\u3093\u3002<\/p>\n<p>\u51fa\u529b\u30b7\u30f3\u30af\u306e\u5909\u66f4: \u3044\u304f\u3064\u304b\u306e\u7279\u5b9a\u306e\u30b7\u30f3\u30af\u306e\u7d44\u307f\u5408\u308f\u305b\u9593\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u3059\u3002\u30b1\u30fc\u30b9\u30d0\u30a4\u30b1\u30fc\u30b9\u3067\u691c\u8a3c\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002\u4ee5\u4e0b\u306b\u3044\u304f\u3064\u304b\u4f8b\u3092\u793a\u3057\u307e\u3059\u3002<\/p>\n<p>\u30d5\u30a1\u30a4\u30eb\u30b7\u30f3\u30af\u304b\u3089Kafka\u30b7\u30f3\u30af\u3078\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u3059\u3002Kafka\u306f\u65b0\u898f\u30c7\u30fc\u30bf\u306e\u307f\u3092\u53c2\u7167\u3057\u307e\u3059\u3002<br \/>\nKafka\u30b7\u30f3\u30af\u304b\u3089\u30d5\u30a1\u30a4\u30eb\u30b7\u30f3\u30af\u3078\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u305b\u3093\u3002<br \/>\nKafka\u30b7\u30f3\u30af\u3068foreach\u306e\u76f8\u4e92\u5909\u63db\u306f\u8a31\u53ef\u3055\u308c\u307e\u3059\u3002<\/p>\n<p>\u51fa\u529b\u30b7\u30f3\u30af\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u30fc\u306e\u5909\u66f4: \u8a31\u53ef\u3055\u308c\u308b\u304b\u3069\u3046\u304b\u3001\u5909\u66f4\u306e\u30bb\u30de\u30f3\u30c6\u30a3\u30af\u30b9\u304cwell-defined\u306b\u306a\u308b\u304b\u3069\u3046\u304b\u306f\u3001\u30b7\u30f3\u30af\u3068\u30af\u30a8\u30ea\u30fc\u306b\u4f9d\u5b58\u3057\u307e\u3059\u3002\u3044\u304b\u306b\u4f8b\u3092\u793a\u3057\u307e\u3059\u3002<\/p>\n<p>\u30d5\u30a1\u30a4\u30eb\u30b7\u30f3\u30af\u306e\u51fa\u529b\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u305b\u3093\u3002sdf.writeStream.format(&#8220;parquet&#8221;).option(&#8220;path&#8221;, &#8220;\/somePath&#8221;)\u304b\u3089sdf.writeStream.format(&#8220;parquet&#8221;).option(&#8220;path&#8221;, &#8220;\/anotherPath&#8221;)\u3078\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u305b\u3093\u3002<br \/>\n\u51fa\u529b\u30a2\u30fc\u30c6\u30a3\u30af\u30eb\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u3059\u3002sdf.writeStream.format(&#8220;kafka&#8221;).option(&#8220;article&#8221;, &#8220;somearticle&#8221;)\u304b\u3089sdf.writeStream.format(&#8220;kafka&#8221;).option(&#8220;path&#8221;, &#8220;anotherarticle&#8221;)\u3078\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u3059\u3002<br \/>\n\u30e6\u30fc\u30b6\u30fc\u5b9a\u7fa9\u306eforeach\u30b7\u30f3\u30af(\u3059\u306a\u308f\u3061ForeachWriter\u30b3\u30fc\u30c9)\u3078\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u3059\u304c\u3001\u5909\u66f4\u306e\u30bb\u30de\u30f3\u30c6\u30a3\u30af\u30b9\u306f\u30b3\u30fc\u30c9\u306b\u4f9d\u5b58\u3057\u307e\u3059\u3002<\/p>\n<p>projection \/ filter \/ map\u30e9\u30a4\u30af\u306a\u30aa\u30da\u30ec\u30fc\u30b7\u30e7\u30f3\u306b\u304a\u3051\u308b\u5909\u66f4: \u3044\u304f\u3064\u304b\u306e\u30b1\u30fc\u30b9\u306f\u8a31\u53ef\u3055\u308c\u307e\u3059\u3002\u4f8b\u3048\u3070\u3001<\/p>\n<p>\u30d5\u30a3\u30eb\u30bf\u30fc\u306e\u8ffd\u52a0\u524a\u9664\u306f\u8a31\u53ef\u3055\u308c\u307e\u3059: sdf.selectExpr(&#8220;a&#8221;)\u304b\u3089sdf.where(&#8230;).selectExpr(&#8220;a&#8221;).filter(&#8230;)\u3078\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u3059\u3002<br \/>\n\u540c\u3058\u51fa\u529b\u30b9\u30ad\u30fc\u30de\u306b\u3088\u308bprojection\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u3059: sdf.selectExpr(&#8220;stringColumn AS json&#8221;).writeStream\u304b\u3089sdf.select(to_json(&#8230;).as(&#8220;json&#8221;)).writeStream\u3078\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u3059\u3002<br \/>\n\u7570\u306a\u308b\u51fa\u529b\u30b9\u30ad\u30fc\u30de\u306b\u3088\u308bprojection\u306e\u5909\u66f4\u306f\u6761\u4ef6\u4ed8\u304d\u3067\u8a31\u53ef\u3055\u308c\u307e\u3059: \u51fa\u529b\u30b7\u30f3\u30af\u304c&#8221;a&#8221;\u304b\u3089&#8221;b&#8221;\u3078\u306e\u30b9\u30ad\u30fc\u30de\u5909\u66f4\u3092\u8a31\u53ef\u3057\u3066\u3044\u308b\u306e\u3067\u3042\u308c\u3070\u3001sdf.selectExpr(&#8220;a&#8221;).writeStream\u304b\u3089sdf.selectExpr(&#8220;b&#8221;).writeStream\u3078\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u3059\u3002<\/p>\n<p>\u30b9\u30c6\u30fc\u30c8\u30d5\u30eb\u306a\u30aa\u30da\u30ec\u30fc\u30b7\u30e7\u30f3\u306b\u304a\u3051\u308b\u5909\u66f4: \u7d50\u679c\u3092\u7d99\u7d9a\u7684\u306b\u66f4\u65b0\u3059\u308b\u305f\u3081\u306b\u3001\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30af\u30a8\u30ea\u30fc\u306b\u304a\u3051\u308b\u5e7e\u3064\u304b\u306e\u30aa\u30da\u30ec\u30fc\u30b7\u30e7\u30f3\u306f\u30b9\u30c6\u30fc\u30c8\u30c7\u30fc\u30bf\u3092\u7dad\u6301\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002\u69cb\u9020\u5316\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u306f\u81ea\u52d5\u3067\u72b6\u614b\u306e\u30c1\u30a7\u30c3\u30af\u30dd\u30a4\u30f3\u30c8\u3092\u30d5\u30a9\u30fc\u30eb\u30c8\u30c8\u30ec\u30e9\u30f3\u30c8\u306a\u30b9\u30c8\u30ec\u30fc\u30b8(\u4f8b\u3048\u3070\u3001DBFS\u3001AWS S3\u3001Azure Blob storage)\u306b\u4f5c\u6210\u3057\u3001\u518d\u8d77\u52d5\u5f8c\u306b\u30ec\u30b9\u30c8\u30a2\u3057\u307e\u3059\u3002\u3057\u304b\u3057\u3001\u30b9\u30c6\u30fc\u30c8\u30c7\u30fc\u30bf\u306e\u30b9\u30ad\u30fc\u30de\u306f\u518d\u8d77\u52d5\u306e\u5408\u9593\u3067\u5909\u66f4\u304c\u306a\u3044\u3053\u3068\u3092\u524d\u63d0\u3068\u3057\u3066\u3044\u307e\u3059\u3002\u3053\u308c\u306f\u3001\u518d\u8d77\u52d5\u306e\u5408\u9593\u306b\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30af\u30a8\u30ea\u30fc\u306e\u30b9\u30c6\u30fc\u30c8\u30d5\u30eb\u30aa\u30da\u30ec\u30fc\u30b7\u30e7\u30f3\u306b\u5bfe\u3059\u308b\u3044\u304b\u306a\u308b\u5909\u66f4(\u8ffd\u52a0\u3001\u524a\u9664\u3001\u30b9\u30ad\u30fc\u30de\u5909\u66f4)\u3082\u8a31\u53ef\u3055\u308c\u306a\u3044\u3053\u3068\u3092\u610f\u5473\u3057\u307e\u3059\u3002\u30b9\u30c6\u30fc\u30c8\u306e\u30ea\u30ab\u30d0\u30ea\u30fc\u3092\u78ba\u5b9f\u306b\u3059\u308b\u305f\u3081\u306b\u3001\u518d\u8d77\u52d5\u306e\u5408\u9593\u306b\u30b9\u30ad\u30fc\u30de\u3092\u5909\u66f4\u3059\u3079\u304d\u3067\u306f\u306a\u3044\u30b9\u30c6\u30fc\u30c8\u30d5\u30eb\u30aa\u30da\u30ec\u30fc\u30b7\u30e7\u30f3\u306e\u30ea\u30b9\u30c8\u3092\u793a\u3057\u307e\u3059\u3002<\/p>\n<p>\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u306e\u96c6\u8a08: \u4f8b\u3048\u3070\u3001sdf.groupBy(&#8220;a&#8221;).agg(&#8230;)\u3002\u30b0\u30eb\u30fc\u30d4\u30f3\u30b0\u306e\u30ad\u30fc\u3001\u96c6\u8a08\u306e\u6570\u3084\u30bf\u30a4\u30d7\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u305b\u3093\u3002<\/p>\n<p>\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u306e\u91cd\u8907\u6392\u9664: \u4f8b\u3048\u3070\u3001sdf.dropDuplicates(&#8220;a&#8221;)\u3002\u30b0\u30eb\u30fc\u30d4\u30f3\u30b0\u306e\u30ad\u30fc\u3001\u96c6\u8a08\u306e\u6570\u3084\u30bf\u30a4\u30d7\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u305b\u3093\u3002<\/p>\n<p>\u30b9\u30c8\u30ea\u30fc\u30e0\u3068\u30b9\u30c8\u30ea\u30fc\u30e0\u306ejoin: \u4f8b\u3048\u3070\u3001sdf1.join(sdf2, &#8230;) (\u3053\u3053\u3067\u306f\u4e21\u65b9\u306e\u5165\u529b\u304csparkSession.readStream\u3067\u751f\u6210\u3055\u308c\u307e\u3059)\u3002\u30b9\u30ad\u30fc\u30de\u3084join\u30ab\u30e9\u30e0\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u305b\u3093\u3002join\u30bf\u30a4\u30d7(inner\/outer)\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u305b\u3093\u3002\u4ed6\u306ejoin\u306e\u6761\u4ef6\u306e\u5909\u66f4\u306e\u7d50\u679c\u3082\u672a\u5b9a\u306b\u306a\u308b\u53ef\u80fd\u6027\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n<p>\u4efb\u610f\u306e\u30b9\u30c6\u30fc\u30c8\u30d5\u30eb\u30aa\u30da\u30ec\u30fc\u30b7\u30e7\u30f3: \u4f8b\u3048\u3070\u3001sdf.groupByKey(&#8230;).mapGroupsWithState(&#8230;)\u3084 sdf.groupByKey(&#8230;).flatMapGroupsWithState(&#8230;)\u3002\u30e6\u30fc\u30b6\u30fc\u5b9a\u7fa9\u306e\u30b9\u30c6\u30fc\u30c8\u306e\u30b9\u30ad\u30fc\u30de\u3084\u30bf\u30a4\u30e0\u30a2\u30a6\u30c8\u306e\u30bf\u30a4\u30d7\u306e\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u305b\u3093\u3002\u30e6\u30fc\u30b6\u30fc\u5b9a\u7fa9\u306e\u30b9\u30c6\u30fc\u30c8\u30de\u30c3\u30d4\u30f3\u30b0\u95a2\u6570\u306b\u304a\u3051\u308b\u3044\u304b\u306a\u308b\u5909\u66f4\u306f\u8a31\u53ef\u3055\u308c\u307e\u3059\u304c\u3001\u5909\u66f4\u306e\u30bb\u30de\u30f3\u30c6\u30a3\u30c3\u30af\u306a\u5f71\u97ff\u306f\u3001\u30e6\u30fc\u30b6\u30fc\u5b9a\u7fa9\u30ed\u30b8\u30c3\u30af\u306b\u4f9d\u5b58\u3057\u307e\u3059\u3002\u30b9\u30c6\u30fc\u30c8\u306e\u30b9\u30ad\u30fc\u30de\u5909\u66f4\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u305f\u3044\u306e\u3067\u3042\u308c\u3070\u3001\u660e\u793a\u7684\u306b\u8907\u96d1\u306a\u30b9\u30c6\u30fc\u30c8\u30c7\u30fc\u30bf\u306e\u69cb\u9020\u3092\u3001\u30b9\u30ad\u30fc\u30de\u30de\u30a4\u30b0\u30ec\u30fc\u30b7\u30e7\u30f3\u3092\u30b5\u30dd\u30fc\u30c8\u3059\u308b\u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\/\u30c7\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u30b9\u30ad\u30fc\u30de\u3092\u7528\u3044\u3066\u3001\u30d0\u30a4\u30c8\u30b3\u30fc\u30c9\u306b\u30a8\u30f3\u30b3\u30fc\u30c9\/\u30c7\u30b3\u30fc\u30c9\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u4f8b\u3048\u3070\u3001Avro\u30a8\u30f3\u30b3\u30fc\u30c9\u3055\u308c\u305f\u30d0\u30a4\u30c8\u30b3\u30fc\u30c9\u3068\u3057\u3066\u30b9\u30c6\u30fc\u30c8\u3092\u4fdd\u5b58\u3059\u308b\u306e\u3067\u3042\u308c\u3070\u3001\u30d0\u30a4\u30ca\u30ea\u30fc\u306e\u30b9\u30c6\u30fc\u30c8\u306f\u5e38\u306b\u554f\u984c\u306a\u304f\u30ec\u30b9\u30c8\u30a2\u3055\u308c\u308b\u306e\u3067\u3001\u30af\u30a8\u30ea\u30fc\u518d\u8d77\u52d5\u306e\u5408\u9593\u306bAvro\u306e\u30b9\u30c6\u30fc\u30c8\u30b9\u30ad\u30fc\u30de\u3092\u81ea\u7531\u306b\u5909\u66f4\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n<h1>\u5bf9\u4e8e\u6d41\u67e5\u8be2\u7684\u76d1\u63a7<\/h1>\n<p>\u60a8\u53ef\u4ee5\u901a\u8fc7Streaming\u6807\u7b7e\u4e0b\u7684Spark UI\u6765\u76d1\u63a7\u6d41\u5f0f\u5e94\u7528\u7a0b\u5e8f\u3002\u901a\u8fc7\u5728df.writeStream.queryName(&lt;\u67e5\u8be2\u540d\u79f0&gt;)\u4e2d\u4e3a\u6d41\u5f0f\u67e5\u8be2\u547d\u540d\uff0c\u60a8\u53ef\u4ee5\u5728Spark UI\u4e2d\u67e5\u770b\u54ea\u4e9b\u6307\u6807\u5c5e\u4e8e\u54ea\u4e2a\u6d41\u5f0f\u67e5\u8be2\u3002<\/p>\n<p>\u901a\u8fc7\u4f7f\u7528Apache Spark\u7684Streaming Query Listener\u63a5\u53e3\uff0c\u53ef\u4ee5\u5c06\u6d41\u5f0f\u6307\u6807\u63a8\u9001\u5230\u5916\u90e8\u670d\u52a1\uff0c\u7528\u4e8e\u8b66\u62a5\u548c\u4eea\u8868\u677f\u3002Streaming Query Listener\u63a5\u53e3\u53ea\u80fd\u5728Scala\u4e2d\u4f7f\u7528\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"k\">import<\/span> <span class=\"nn\">org.apache.spark.sql.streaming.StreamingQueryListener<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">org.apache.spark.sql.streaming.StreamingQueryListener._<\/span>\r\n\r\n<span class=\"k\">val<\/span> <span class=\"nv\">myListener<\/span> <span class=\"k\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">StreamingQueryListener<\/span> <span class=\"o\">{<\/span>\r\n\r\n  <span class=\"cm\">\/**\r\n   * Called when a query is started.\r\n   * @note This is called synchronously with\r\n   *       [[org.apache.spark.sql.streaming.DataStreamWriter `DataStreamWriter.start()`]],\r\n   *       that is, `onQueryStart` will be called on all listeners before\r\n   *       `DataStreamWriter.start()` returns the corresponding [[StreamingQuery]].\r\n   *        Do not block in this method as it will block your query.\r\n   *\/<\/span>\r\n  <span class=\"k\">def<\/span> <span class=\"nf\">onQueryStarted<\/span><span class=\"o\">(<\/span><span class=\"n\">event<\/span><span class=\"k\">:<\/span> <span class=\"kt\">QueryStartedEvent<\/span><span class=\"o\">)<\/span><span class=\"k\">:<\/span> <span class=\"kt\">Unit<\/span> <span class=\"o\">=<\/span> <span class=\"o\">{}<\/span>\r\n\r\n  <span class=\"cm\">\/**\r\n   * Called when there is some status update (ingestion rate updated, etc.)\r\n   *\r\n   * @note This method is asynchronous. The status in [[StreamingQuery]] will always be\r\n   *       latest no matter when this method is called. Therefore, the status of [[StreamingQuery]]\r\n   *       may be changed before\/when you process the event. For example, you may find [[StreamingQuery]]\r\n   *       is terminated when you are processing `QueryProgressEvent`.\r\n   *\/<\/span>\r\n  <span class=\"k\">def<\/span> <span class=\"nf\">onQueryProgress<\/span><span class=\"o\">(<\/span><span class=\"n\">event<\/span><span class=\"k\">:<\/span> <span class=\"kt\">QueryProgressEvent<\/span><span class=\"o\">)<\/span><span class=\"k\">:<\/span> <span class=\"kt\">Unit<\/span> <span class=\"o\">=<\/span> <span class=\"o\">{}<\/span>\r\n\r\n  <span class=\"cm\">\/**\r\n   * Called when a query is stopped, with or without error.\r\n   *\/<\/span>\r\n  <span class=\"k\">def<\/span> <span class=\"nf\">onQueryTerminated<\/span><span class=\"o\">(<\/span><span class=\"n\">event<\/span><span class=\"k\">:<\/span> <span class=\"kt\">QueryTerminatedEvent<\/span><span class=\"o\">)<\/span><span class=\"k\">:<\/span> <span class=\"kt\">Unit<\/span> <span class=\"o\">=<\/span> <span class=\"o\">{}<\/span>\r\n<span class=\"o\">}<\/span>\r\n<\/code><\/pre>\n<h2>\u89c2\u6d4b\u65b9\u6cd5<\/h2>\n<p>\u53ef\u4ee5\u89c2\u6d4b\u7684\u5ea6\u91cf\u53ef\u4ee5\u901a\u8fc7\u67e5\u8be2\uff08\u6570\u636e\u6846\uff09\u6765\u5b9a\u4e49\u7684\u5e26\u6709\u547d\u540d\u7684\u4efb\u610f\u805a\u5408\u51fd\u6570\u3002\u5f53\u6570\u636e\u6846\u7684\u6267\u884c\u8fbe\u5230\u5b8c\u6210\u70b9\u65f6\uff0c\u5c06\u89e6\u53d1\u4e00\u4e2a\u5e26\u6709\u547d\u540d\u4e8b\u4ef6\u7684\u7edf\u8ba1\u6307\u6807\uff0c\u8be5\u4e8b\u4ef6\u5305\u542b\u81ea\u4e0a\u6b21\u5b8c\u6210\u70b9\u4ee5\u6765\u5904\u7406\u7684\u6570\u636e\u7684\u7edf\u8ba1\u6307\u6807\u3002<\/p>\n<p>\u901a\u8fc7\u4e3a Spark \u4f1a\u8bdd\u9644\u52a0\u76d1\u542c\u5668\u53ef\u4ee5\u89c2\u5bdf\u8fd9\u4e9b\u5ea6\u91cf\u503c\u3002\u76d1\u542c\u5668\u7684\u884c\u4e3a\u53d6\u51b3\u4e8e\u6267\u884c\u6a21\u5f0f\u3002<\/p>\n<p>\u30d0\u30c3\u30c1\u30e2\u30fc\u30c9: QueryExecutionListener\u3092\u4f7f\u3044\u307e\u3059\u3002<br \/>\n\u30af\u30a8\u30ea\u30fc\u304c\u5b8c\u4e86\u3059\u308b\u3068QueryExecutionListener\u304c\u30b3\u30fc\u30eb\u3055\u308c\u307e\u3059\u3002QueryExecution.observedMetrics map\u3092\u7528\u3044\u3066\u30e1\u30c8\u30ea\u30af\u30b9\u306b\u30a2\u30af\u30bb\u30b9\u3057\u307e\u3059\u3002<\/p>\n<p>\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u3001\u3042\u308b\u3044\u306f\u30de\u30a4\u30af\u30ed\u30d0\u30c3\u30c1: StreamingQueryListener\u3092\u4f7f\u3044\u307e\u3059\u3002<br \/>\n\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30af\u30a8\u30ea\u30fc\u304c\u30a8\u30dd\u30c3\u30af\u3092\u5b8c\u4e86\u3059\u308b\u3068StreamingQueryListener\u304c\u30b3\u30fc\u30eb\u3055\u308c\u307e\u3059\u3002StreamingQueryProgress.observedMetrics map\u3092\u7528\u3044\u3066\u30e1\u30c8\u30ea\u30af\u30b9\u306b\u30a2\u30af\u30bb\u30b9\u3057\u307e\u3059\u3002Databricks\u3067\u306f\u9023\u7d9a\u5b9f\u884c\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u3066\u3044\u307e\u305b\u3093\u3002<\/p>\n<p>\u9019\u88e1\u63d0\u4f9b\u4e00\u500b\u4f8b\u5b50\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"c1\">\/\/ Observe row count (rc) and error row count (erc) in the streaming Dataset<\/span>\r\n<span class=\"k\">val<\/span> <span class=\"nv\">observed_ds<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">ds<\/span><span class=\"o\">.<\/span><span class=\"py\">observe<\/span><span class=\"o\">(<\/span><span class=\"s\">\"my_event\"<\/span><span class=\"o\">,<\/span> <span class=\"nf\">count<\/span><span class=\"o\">(<\/span><span class=\"nf\">lit<\/span><span class=\"o\">(<\/span><span class=\"mi\">1<\/span><span class=\"o\">)).<\/span><span class=\"py\">as<\/span><span class=\"o\">(<\/span><span class=\"s\">\"rc\"<\/span><span class=\"o\">),<\/span> <span class=\"nf\">count<\/span><span class=\"o\">(<\/span><span class=\"n\">$<\/span><span class=\"s\">\"error\"<\/span><span class=\"o\">).<\/span><span class=\"py\">as<\/span><span class=\"o\">(<\/span><span class=\"s\">\"erc\"<\/span><span class=\"o\">))<\/span>\r\n<span class=\"nv\">observed_ds<\/span><span class=\"o\">.<\/span><span class=\"py\">writeStream<\/span><span class=\"o\">.<\/span><span class=\"py\">format<\/span><span class=\"o\">(<\/span><span class=\"s\">\"...\"<\/span><span class=\"o\">).<\/span><span class=\"py\">start<\/span><span class=\"o\">()<\/span>\r\n\r\n<span class=\"c1\">\/\/ Monitor the metrics using a listener<\/span>\r\n<span class=\"nv\">spark<\/span><span class=\"o\">.<\/span><span class=\"py\">streams<\/span><span class=\"o\">.<\/span><span class=\"py\">addListener<\/span><span class=\"o\">(<\/span><span class=\"k\">new<\/span> <span class=\"nc\">StreamingQueryListener<\/span><span class=\"o\">()<\/span> <span class=\"o\">{<\/span>\r\n  <span class=\"k\">override<\/span> <span class=\"k\">def<\/span> <span class=\"nf\">onQueryProgress<\/span><span class=\"o\">(<\/span><span class=\"n\">event<\/span><span class=\"k\">:<\/span> <span class=\"kt\">QueryProgressEvent<\/span><span class=\"o\">)<\/span><span class=\"k\">:<\/span> <span class=\"kt\">Unit<\/span> <span class=\"o\">=<\/span> <span class=\"o\">{<\/span>\r\n    <span class=\"nv\">event<\/span><span class=\"o\">.<\/span><span class=\"py\">progress<\/span><span class=\"o\">.<\/span><span class=\"py\">observedMetrics<\/span><span class=\"o\">.<\/span><span class=\"py\">get<\/span><span class=\"o\">(<\/span><span class=\"s\">\"my_event\"<\/span><span class=\"o\">).<\/span><span class=\"py\">foreach<\/span> <span class=\"o\">{<\/span> <span class=\"n\">row<\/span> <span class=\"k\">=&gt;<\/span>\r\n      <span class=\"c1\">\/\/ Trigger if the number of errors exceeds 5 percent<\/span>\r\n      <span class=\"k\">val<\/span> <span class=\"nv\">num_rows<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">row<\/span><span class=\"o\">.<\/span><span class=\"py\">getAs<\/span><span class=\"o\">[<\/span><span class=\"kt\">Long<\/span><span class=\"o\">](<\/span><span class=\"s\">\"rc\"<\/span><span class=\"o\">)<\/span>\r\n      <span class=\"k\">val<\/span> <span class=\"nv\">num_error_rows<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">row<\/span><span class=\"o\">.<\/span><span class=\"py\">getAs<\/span><span class=\"o\">[<\/span><span class=\"kt\">Long<\/span><span class=\"o\">](<\/span><span class=\"s\">\"erc\"<\/span><span class=\"o\">)<\/span>\r\n      <span class=\"k\">val<\/span> <span class=\"nv\">ratio<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">num_error_rows<\/span><span class=\"o\">.<\/span><span class=\"py\">toDouble<\/span> <span class=\"o\">\/<\/span> <span class=\"n\">num_rows<\/span>\r\n      <span class=\"nf\">if<\/span> <span class=\"o\">(<\/span><span class=\"n\">ratio<\/span> <span class=\"o\">&gt;<\/span> <span class=\"mf\">0.05<\/span><span class=\"o\">)<\/span> <span class=\"o\">{<\/span>\r\n        <span class=\"c1\">\/\/ Trigger alert<\/span>\r\n      <span class=\"o\">}<\/span>\r\n    <span class=\"o\">}<\/span>\r\n  <span class=\"o\">}<\/span>\r\n<span class=\"o\">})<\/span>\r\n<\/code><\/pre>\n<h1>\u4e3a\u4e86\u63d0\u9ad8\u6548\u7387\uff0c\u8bbe\u7f6eApache Spark\u8c03\u5ea6\u7a0b\u5e8f\u6c60\u3002<\/h1>\n<p>\u9ed8\u8ba4\u60c5\u51b5\u4e0b\uff0c\u6240\u6709\u5728\u7b14\u8bb0\u672c\u4e2d\u542f\u52a8\u7684\u67e5\u8be2\u90fd\u5c06\u5728\u540c\u4e00\u516c\u5e73\u8c03\u5ea6\u6c60\u4e2d\u6267\u884c\u3002\u56e0\u6b64\uff0c\u6240\u6709\u5728\u7b14\u8bb0\u672c\u4e2d\u9010\u4e2a\u6267\u884c\u7684\u6d41\u67e5\u8be2\u5c06\u6309\u7167\u5148\u8fdb\u5148\u51fa\uff08FIFO\uff09\u7684\u65b9\u5f0f\u8fdb\u884c\u5904\u7406\u3002\u901a\u8fc7\u4e0d\u5728\u67e5\u8be2\u4e4b\u95f4\u6709\u6548\u5171\u4eab\u96c6\u7fa4\u8d44\u6e90\uff0c\u53ef\u80fd\u4f1a\u5bfc\u81f4\u67e5\u8be2\u4e0d\u5fc5\u8981\u7684\u5ef6\u8fdf\u3002<\/p>\n<p>\u4e3a\u4e86\u4f7f\u6240\u6709\u7684\u6d41\u67e5\u8be2\u80fd\u591f\u540c\u65f6\u6267\u884c\u4f5c\u4e1a\uff0c\u5e76\u80fd\u591f\u6709\u6548\u5730\u5171\u4eab\u96c6\u7fa4\u8d44\u6e90\uff0c\u53ef\u4ee5\u5c06\u67e5\u8be2\u8bbe\u7f6e\u4e3a\u5728\u53e6\u4e00\u4e2a\u8c03\u5ea6\u7a0b\u5e8f\u6c60\u4e2d\u8fd0\u884c\u3002\u4f8b\u5982\uff0c\u53ef\u4ee5\u6309\u7167\u4ee5\u4e0b\u65b9\u5f0f\u8fdb\u884c\u8bbe\u7f6e\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"c1\">\/\/ Run streaming query1 in scheduler pool1<\/span>\r\n<span class=\"nv\">spark<\/span><span class=\"o\">.<\/span><span class=\"py\">sparkContext<\/span><span class=\"o\">.<\/span><span class=\"py\">setLocalProperty<\/span><span class=\"o\">(<\/span><span class=\"s\">\"spark.scheduler.pool\"<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"pool1\"<\/span><span class=\"o\">)<\/span>\r\n<span class=\"nv\">df<\/span><span class=\"o\">.<\/span><span class=\"py\">writeStream<\/span><span class=\"o\">.<\/span><span class=\"py\">queryName<\/span><span class=\"o\">(<\/span><span class=\"s\">\"query1\"<\/span><span class=\"o\">).<\/span><span class=\"py\">format<\/span><span class=\"o\">(<\/span><span class=\"s\">\"parquet\"<\/span><span class=\"o\">).<\/span><span class=\"py\">start<\/span><span class=\"o\">(<\/span><span class=\"n\">path1<\/span><span class=\"o\">)<\/span>\r\n\r\n<span class=\"c1\">\/\/ Run streaming query2 in scheduler pool2<\/span>\r\n<span class=\"nv\">spark<\/span><span class=\"o\">.<\/span><span class=\"py\">sparkContext<\/span><span class=\"o\">.<\/span><span class=\"py\">setLocalProperty<\/span><span class=\"o\">(<\/span><span class=\"s\">\"spark.scheduler.pool\"<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"pool2\"<\/span><span class=\"o\">)<\/span>\r\n<span class=\"nv\">df<\/span><span class=\"o\">.<\/span><span class=\"py\">writeStream<\/span><span class=\"o\">.<\/span><span class=\"py\">queryName<\/span><span class=\"o\">(<\/span><span class=\"s\">\"query2\"<\/span><span class=\"o\">).<\/span><span class=\"py\">format<\/span><span class=\"o\">(<\/span><span class=\"s\">\"orc\"<\/span><span class=\"o\">).<\/span><span class=\"py\">start<\/span><span class=\"o\">(<\/span><span class=\"n\">path2<\/span><span class=\"o\">)<\/span>\r\n<\/code><\/pre>\n<div>\u8bf7\u6ce8\u610f\uff0c\u672c\u5730\u5c5e\u6027\u8bbe\u7f6e\u5fc5\u987b\u5728\u542f\u52a8\u6d41\u5f0f\u67e5\u8be2\u7684\u540c\u4e00\u4e2a\u7b14\u8bb0\u672c\u5355\u5143\u4e2d\u8fdb\u884c\u3002<\/div>\n<p>\u8bf7\u53c2\u9605Apache\u516c\u5e73\u8c03\u5ea6\u5668\u6587\u6863\u4ee5\u83b7\u53d6\u66f4\u8be6\u7ec6\u7684\u4fe1\u606f\u3002<\/p>\n<h1>\u4f18\u5316\u57fa\u4e8e\u72b6\u6001\u7684\u6d41\u5f0f\u67e5\u8be2\u7684\u6027\u80fd<\/h1>\n<p>\u5982\u679c\u60a8\u5728\u6d41\u5f0f\u67e5\u8be2\u4e2d\u6267\u884c\u6709\u72b6\u6001\u7684\u64cd\u4f5c\uff08\u4f8b\u5982\u6d41\u5f0f\u805a\u5408\u3001\u6d41\u5f0f\u8d1f\u8d23\u91cd\u590d\u9879\u3001\u6d41\u4e0e\u6d41\u7684\u8fde\u63a5\u3001mapGroupsWithState\u3001flatMapGroupsWithState\uff09\uff0c\u5e76\u4e14\u5e0c\u671b\u4fdd\u7559\u6570\u767e\u4e07\u4e2a\u952e\u7684\u72b6\u6001\uff0c\u5219\u53ef\u80fd\u4f1a\u9047\u5230\u4e00\u4e9b\u95ee\u9898\uff0c\u5982\u7531\u4e8e\u5927\u578bJVM\u5783\u573e\u56de\u6536\uff08GC\uff09\u5bfc\u81f4\u7684\u5fae\u6279\u5904\u7406\u65f6\u95f4\u53d8\u52a8\u7b49\u3002\u8fd9\u662f\u56e0\u4e3a\u9ed8\u8ba4\u60c5\u51b5\u4e0b\uff0c\u72b6\u6001\u6570\u636e\u88ab\u4fdd\u5b58\u5728\u6267\u884c\u5668\u7684JVM\u5185\u5b58\u4e2d\uff0c\u5927\u91cf\u7684\u72b6\u6001\u5bf9\u8c61\u4f1a\u5bfc\u81f4JVM\u7684\u5185\u5b58\u6d88\u8017\u589e\u52a0\uff0c\u8fdb\u800c\u5bfc\u81f4\u5927\u89c4\u6a21\u7684GC\u6682\u505c\u3002<\/p>\n<p>\u5728\u8fd9\u79cd\u60c5\u51b5\u4e0b\uff0c\u60a8\u53ef\u4ee5\u9009\u62e9\u57fa\u4e8eRocksDB\u7684\u66f4\u4f18\u5316\u7684\u72b6\u6001\u7ba1\u7406\u89e3\u51b3\u65b9\u6848\u3002\u8fd9\u4e2a\u89e3\u51b3\u65b9\u6848\u53ef\u4ee5\u5728Databricks\u8fd0\u884c\u65f6\u4e2d\u4f7f\u7528\u3002\u4e0e\u5c06\u72b6\u6001\u4fdd\u6301\u5728JVM\u5185\u5b58\u4e2d\u76f8\u6bd4\uff0c\u8be5\u89e3\u51b3\u65b9\u6848\u4f7f\u7528RocksDB\u5728\u672c\u5730\u5185\u5b58\u548c\u672c\u5730SSD\u4e0a\u9ad8\u6548\u7ba1\u7406\u72b6\u6001\u3002\u6b64\u5916\uff0c\u5bf9\u4e8e\u6b64\u72b6\u6001\u7684\u4efb\u4f55\u66f4\u6539\u90fd\u4f1a\u81ea\u52a8\u4fdd\u5b58\u5230\u7531\u7ed3\u6784\u5316\u6d41\u6307\u5b9a\u7684\u68c0\u67e5\u70b9\u4f4d\u7f6e\uff0c\u4ece\u800c\u786e\u4fdd\uff08\u4e0e\u9ed8\u8ba4\u72b6\u6001\u7ba1\u7406\u76f8\u540c\uff09\u5b8c\u5168\u7684\u5bb9\u9519\u6027\u3002\u6709\u5173\u5982\u4f55\u5c06RocksDB\u8bbe\u7f6e\u4e3a\u72b6\u6001\u5b58\u50a8\u7684\u65b9\u6cd5\uff0c\u8bf7\u53c2\u9605\u914d\u7f6eRocksDB\u72b6\u6001\u5b58\u50a8\u3002<\/p>\n<p>\u4ee5\u4e0b\u662f\u4e3a\u4e86\u83b7\u5f97\u6700\u4f73\u6027\u80fd\u800c\u63a8\u8350\u7684\u8bbe\u7f6e\u3002<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u30ef\u30fc\u30ab\u30fc\u3068\u3057\u3066compute-optimized\u30a4\u30f3\u30b9\u30bf\u30f3\u30b9\u3092\u4f7f\u7528\u3057\u307e\u3059\u3002\u4f8b\u3048\u3070\u3001AWS\u306ec3.4xlarge\u30a4\u30f3\u30b9\u30bf\u30f3\u30b9\u306a\u3069\u3067\u3059\u3002<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\u30af\u30e9\u30b9\u30bf\u30fc\u306b\u304a\u3051\u308b\u30b3\u30a2\u6570\u306e1-2\u500d\u306e\u30b7\u30e3\u30c3\u30d5\u30eb\u30d1\u30fc\u30c6\u30a3\u30b7\u30e7\u30f3\u6570\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002<\/ul>\n<p>\u5728RocksDB\u57fa\u7840\u7684\u72b6\u6001\u7ba1\u7406\u4e2d\uff0c\u5173\u4e8e\u6027\u80fd\u7684\u4f18\u70b9\u662f\u53ef\u4ee5\u7ef4\u6301\u8d85\u8fc7\u9ed8\u8ba4\u503c100\u500d\u4ee5\u4e0a\u7684\u72b6\u6001\u952e\u3002\u4f8b\u5982\uff0c\u5728\u4f7f\u7528AWS c3.4xlarge\u5b9e\u4f8b\u7684Spark\u96c6\u7fa4\u4e2d\u4f5c\u4e3a\u5de5\u4f5c\u8005\uff0c\u4f7f\u7528\u9ed8\u8ba4\u7684\u72b6\u6001\u7ba1\u7406\uff0c\u6bcf\u4e2a\u6267\u884c\u5668\u53ef\u4ee5\u4fdd\u6301\u6700\u591a1-2\u767e\u4e07\u4e2a\u72b6\u6001\u952e\uff0c\u4f46\u4e4b\u540eJVM\u7684\u5783\u573e\u56de\u6536\u4f1a\u5f00\u59cb\u5e76\u5bf9\u6027\u80fd\u4ea7\u751f\u5f71\u54cd\u3002\u7136\u800c\uff0c\u57fa\u4e8eRocksDB\u7684\u72b6\u6001\u7ba1\u7406\u53ef\u4ee5\u8f7b\u677e\u5730\u6bcf\u4e2a\u6267\u884c\u5668\u4fdd\u6301\u4e00\u4ebf\u4e2a\u72b6\u6001\u952e\uff0c\u800c\u4e0d\u4f1a\u9762\u4e34\u5783\u573e\u56de\u6536\u7684\u95ee\u9898\u3002<\/p>\n<div>\u8bf7\u6ce8\u610f\uff0c\u5728\u91cd\u65b0\u542f\u52a8\u67e5\u8be2\u671f\u95f4\u4e0d\u80fd\u66f4\u6539\u72b6\u6001\u7ba1\u7406\u65b9\u6848\u3002\u6362\u53e5\u8bdd\u8bf4\uff0c\u5982\u679c\u60a8\u4f7f\u7528\u9ed8\u8ba4\u7684\u72b6\u6001\u7ba1\u7406\u542f\u52a8\u67e5\u8be2\uff0c\u5219\u9664\u975e\u4f7f\u7528\u65b0\u7684\u68c0\u67e5\u70b9\u5b58\u50a8\u4f4d\u7f6e\u91cd\u65b0\u542f\u52a8\u67e5\u8be2\uff0c\u5426\u5219\u65e0\u6cd5\u66f4\u6539\u3002<\/div>\n<h2>\u8bbe\u7f6eRocksDB\u72b6\u6001\u5b58\u50a8<\/h2>\n<p>\u5728\u5f00\u59cb\u6d41\u67e5\u8be2\u4e4b\u524d\uff0c\u60a8\u53ef\u4ee5\u901a\u8fc7\u5728SparkSession\u4e2d\u8fdb\u884c\u4ee5\u4e0b\u8bbe\u7f6e\u6765\u542f\u7528\u57fa\u4e8eRocksDB\u7684\u72b6\u6001\u7ba1\u7406\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"nv\">spark<\/span><span class=\"o\">.<\/span><span class=\"py\">conf<\/span><span class=\"o\">.<\/span><span class=\"py\">set<\/span><span class=\"o\">(<\/span>\r\n  <span class=\"s\">\"spark.sql.streaming.stateStore.providerClass\"<\/span><span class=\"o\">,<\/span>\r\n  <span class=\"s\">\"com.databricks.sql.streaming.state.RocksDBStateStoreProvider\"<\/span><span class=\"o\">)<\/span>\r\n<\/code><\/pre>\n<h3>RocksDB\u72b6\u6001\u5b58\u50a8\u7684\u6307\u6807<\/h3>\n<p>\u6bcf\u4e2a\u72b6\u6001\u64cd\u4f5c\u7b26\u5c06\u76d1\u89c6\u72b6\u6001\u5b58\u50a8\u5e76\u6536\u96c6\u4e0e\u5728RocksDB\u5b9e\u4f8b\u4e0a\u8fd0\u884c\u7684\u72b6\u6001\u7ba1\u7406\u64cd\u4f5c\u76f8\u5173\u7684\u5ea6\u91cf\u6807\u51c6\uff0c\u4ee5\u5e2e\u52a9\u8c03\u8bd5\u6162\u901f\u4f5c\u4e1a\u3002\u8fd9\u4e9b\u5ea6\u91cf\u6807\u51c6\u88ab\u805a\u5408\uff08\u603b\u548c\uff09\u5230\u6bcf\u4e2a\u4f5c\u4e1a\u7684\u72b6\u6001\u64cd\u4f5c\u7b26\u4e2d\uff0c\u5728\u6267\u884c\u72b6\u6001\u64cd\u4f5c\u7b26\u7684\u6240\u6709\u4efb\u52a1\u4e2d\u3002\u8fd9\u4e9b\u5ea6\u91cf\u6807\u51c6\u5c06\u6210\u4e3aStreamingQueryProgress\u4e2dstateOperators\u4e2dcustomMetrics\u6620\u5c04\u7684\u4e00\u90e8\u5206\u3002\u4ee5\u4e0b\u662f\u901a\u8fc7\u8c03\u7528StreamingQueryProgress.json()\u83b7\u53d6\u7684JSON\u683c\u5f0f\u7684StreamingQueryProgress\u793a\u4f8b\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"p\">{<\/span>\r\n  <span class=\"nl\">\"id\"<\/span> <span class=\"p\">:<\/span> <span class=\"s2\">\"6774075e-8869-454b-ad51-513be86cfd43\"<\/span><span class=\"p\">,<\/span>\r\n  <span class=\"nl\">\"runId\"<\/span> <span class=\"p\">:<\/span> <span class=\"s2\">\"3d08104d-d1d4-4d1a-b21e-0b2e1fb871c5\"<\/span><span class=\"p\">,<\/span>\r\n  <span class=\"nl\">\"batchId\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">7<\/span><span class=\"p\">,<\/span>\r\n  <span class=\"nl\">\"stateOperators\"<\/span> <span class=\"p\">:<\/span> <span class=\"p\">[<\/span> <span class=\"p\">{<\/span>\r\n    <span class=\"nl\">\"numRowsTotal\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">20000000<\/span><span class=\"p\">,<\/span>\r\n    <span class=\"nl\">\"numRowsUpdated\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">20000000<\/span><span class=\"p\">,<\/span>\r\n    <span class=\"nl\">\"memoryUsedBytes\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">31005397<\/span><span class=\"p\">,<\/span>\r\n    <span class=\"nl\">\"numRowsDroppedByWatermark\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">0<\/span><span class=\"p\">,<\/span>\r\n    <span class=\"nl\">\"customMetrics\"<\/span> <span class=\"p\">:<\/span> <span class=\"p\">{<\/span>\r\n      <span class=\"nl\">\"rocksdbBytesCopied\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">141037747<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbCommitCheckpointLatency\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">2<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbCommitCompactLatency\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">22061<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbCommitFileSyncLatencyMs\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">1710<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbCommitFlushLatency\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">19032<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbCommitPauseLatency\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">0<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbCommitWriteBatchLatency\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">56155<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbFilesCopied\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">2<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbFilesReused\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">0<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbGetCount\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">40000000<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbGetLatency\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">21834<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbPutCount\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">1<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbPutLatency\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">56155599000<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbReadBlockCacheHitCount\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">1988<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbReadBlockCacheMissCount\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">40341617<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbSstFileSize\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">141037747<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbTotalBytesReadByCompaction\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">336853375<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbTotalBytesReadByGet\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">680000000<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbTotalBytesReadThroughIterator\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">0<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbTotalBytesWrittenByCompaction\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">141037747<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbTotalBytesWrittenByPut\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">740000012<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbTotalCompactionLatencyMs\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">21949695000<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbWriterStallLatencyMs\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">0<\/span><span class=\"p\">,<\/span>\r\n      <span class=\"nl\">\"rocksdbZipFileBytesUncompressed\"<\/span> <span class=\"p\">:<\/span> <span class=\"mi\">7038<\/span>\r\n    <span class=\"p\">}<\/span>\r\n  <span class=\"p\">}<\/span> <span class=\"p\">],<\/span>\r\n  <span class=\"nl\">\"sources\"<\/span> <span class=\"p\">:<\/span> <span class=\"p\">[<\/span> <span class=\"p\">{<\/span>\r\n  <span class=\"p\">}<\/span> <span class=\"p\">],<\/span>\r\n  <span class=\"nl\">\"sink\"<\/span> <span class=\"p\">:<\/span> <span class=\"p\">{<\/span>\r\n  <span class=\"p\">}<\/span>\r\n<span class=\"p\">}<\/span>\r\n<\/code><\/pre>\n<p>\u8bf7\u53c2\u8003\u539f\u6587\u4ee5\u83b7\u53d6\u6709\u5173\u5ea6\u91cf\u6307\u6807\u7684\u8be6\u7ec6\u8bf4\u660e\u3002<\/p>\n<h2>\u521b\u5efa\u975e\u540c\u6b65\u72b6\u6001\u68c0\u67e5\u70b9<\/h2>\n<div>\u6ce8\u610f\uff1a\u53ea\u80fd\u5728Databricks\u8fd0\u884c\u65f610.3\u53ca\u4ee5\u4e0a\u7248\u672c\u4e2d\u4f7f\u7528\u8be5\u529f\u80fd\u3002<\/div>\n<p>\u901a\u8fc7\u542f\u7528\u5f02\u6b65\u72b6\u6001\u68c0\u67e5\u70b9\uff0c\u5728\u6d89\u53ca\u5927\u89c4\u6a21\u72b6\u6001\u66f4\u65b0\u7684\u6709\u72b6\u6001\u6d41\u5f0f\u67e5\u8be2\u4e2d\uff0c\u53ef\u80fd\u4f1a\u51cf\u5c11\u7aef\u5230\u7aef\u5fae\u6279\u5904\u7406\u7684\u5ef6\u8fdf\u3002<\/p>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7ca7913a08637a69d0f4\/59-0.png\" alt=\"\" \/><\/div>\n<p>\u975e\u540c\u6b65\u578b\u7684\u72b6\u6001\u68c0\u67e5\u70b9\u4f1a\u5c1d\u8bd5\u5f02\u6b65\u5730\u521b\u5efa\u68c0\u67e5\u70b9\uff0c\u4ee5\u514d\u5fae\u6279\u5904\u7406\u6267\u884c\u9700\u8981\u7b49\u5f85\u68c0\u67e5\u70b9\u521b\u5efa\u5b8c\u6210\u3002\u6362\u53e5\u8bdd\u8bf4\uff0c\u4e0b\u4e00\u4e2a\u5fae\u6279\u5904\u7406\u4f1a\u5728\u4e0a\u4e00\u4e2a\u5fae\u6279\u5904\u7406\u5b8c\u6210\u540e\u7acb\u5373\u542f\u52a8\u3002\u7136\u800c\uff0c\u5728\u5185\u90e8\uff0c\u504f\u79fb\u91cf\u5143\u6570\u636e\uff08\u4fdd\u5b58\u5728\u68c0\u67e5\u70b9\u7684\u4f4d\u7f6e\uff09\u4f1a\u8ddf\u8e2a\u6bcf\u4e2a\u5fae\u6279\u5904\u7406\u7684\u72b6\u6001\u68c0\u67e5\u70b9\u662f\u5426\u5df2\u5b8c\u6210\u3002\u5982\u679c\u91cd\u65b0\u542f\u52a8\u67e5\u8be2\uff0c\u5219\u53ef\u80fd\u9700\u8981\u91cd\u65b0\u6267\u884c\u4e00\u4e2a\u6216\u591a\u4e2a\u5fae\u6279\u5904\u7406\u3002\u8fd9\u53ef\u80fd\u5305\u62ec\u5c1a\u672a\u5b8c\u6210\u8ba1\u7b97\u7684\u6700\u65b0\u5fae\u6279\u5904\u7406\u4ee5\u53ca\u5c1a\u672a\u5b8c\u6210\u72b6\u6001\u68c0\u67e5\u70b9\u7684\u5fae\u6279\u5904\u7406\u3002\u56e0\u6b64\uff0c\u60a8\u53ef\u4ee5\u83b7\u5f97\u4e0e\u540c\u6b65\u68c0\u67e5\u70b9\u521b\u5efa\u76f8\u540c\u7684\u6545\u969c\u5bb9\u9519\u4fdd\u8bc1\uff08\u5373\u5177\u6709\u5e42\u7b49\u6027\u7684\u6d88\u8d39\u8005\u5e76\u786e\u4fdd\u4ec5\u4e00\u6b21\u7684\u4fdd\u8bc1\uff09\u3002<\/p>\n<p>\u7b80\u8a00\u4e4b\uff0c\u901a\u8fc7\u5728\u5b58\u5728\u72b6\u6001\u66f4\u65b0\u74f6\u9888\u7684\u72b6\u6001\u4fdd\u6301\u578b\u6d41\u5f0f\u67e5\u8be2\u4e2d\u542f\u7528\u5f02\u6b65\u68c0\u67e5\u70b9\u521b\u5efa\uff0c\u53ef\u4ee5\u5728\u51cf\u5c11\u7aef\u5230\u7aef\u5ef6\u8fdf\u7684\u4ee3\u4ef7\u4e0a\uff0c\u4fdd\u6301\u6240\u6709\u5bb9\u9519\u6027\u4fdd\u8bc1\uff0c\u800c\u65e0\u9700\u727a\u7272\u4efb\u4f55\u53ef\u9760\u6027\u3002<\/p>\n<h3>\u786e\u5b9a\u76ee\u6807\u5de5\u4f5c\u8d1f\u8f7d<\/h3>\n<p>\u4ee5\u4e0b\u662f\u901a\u8fc7\u521b\u5efa\u5f02\u6b65\u68c0\u67e5\u70b9\u800c\u53ef\u80fd\u4eab\u53d7\u5230\u4f18\u52bf\u7684\u6d41\u5f0f\u4f5c\u4e1a\u7684\u7279\u6027\u3002<\/p>\n<p>\u30b8\u30e7\u30d6\u306b1\u3064\u4ee5\u4e0a\u306e\u30b9\u30c6\u30fc\u30c8\u30d5\u30eb\u306a\u30aa\u30da\u30ec\u30fc\u30b7\u30e7\u30f3(\u4f8b\u3048\u3070\u3001\u96c6\u8a08\u3001[flat]MapGroupsWithState\u3001\u30b9\u30c8\u30ea\u30fc\u30e0\u3068\u30b9\u30c8\u30ea\u30fc\u30e0\u306ejoin)\u304c\u542b\u307e\u308c\u3066\u3044\u308b\u3002<\/p>\n<p>\u30b9\u30c6\u30fc\u30c8\u306e\u30c1\u30a7\u30c3\u30af\u30dd\u30a4\u30f3\u30c8\u306e\u30ec\u30fc\u30c6\u30f3\u30b7\u30fc\u304c\u30d0\u30c3\u30c1\u51e6\u7406\u5168\u4f53\u306e\u30ec\u30fc\u30c6\u30fc\u30f3\u30b7\u30fc\u306e\u5927\u90e8\u5206\u3092\u5360\u3081\u308b\u3002\u3053\u306e\u60c5\u5831\u306fStreamingQueryProgress\u3067\u78ba\u8a8d\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u3053\u308c\u3089\u306e\u30a4\u30d9\u30f3\u30c8\u306fSpark\u30c9\u30e9\u30a4\u30d0\u30fc\u306elog4j\u30ed\u30b0\u3067\u3082\u78ba\u8a8d\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u4ee5\u4e0b\u306b\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30af\u30a8\u30ea\u30fc\u306e\u9032\u6357\u72b6\u6cc1\u3068\u3001\u5168\u4f53\u7684\u306a\u30d0\u30c3\u30c1\u5b9f\u884c\u306e\u30ec\u30fc\u30c6\u30f3\u30b7\u30fc\u306b\u304a\u3051\u308b\u30b9\u30c6\u30fc\u30c8\u30c1\u30a7\u30c3\u30af\u30dd\u30a4\u30f3\u30c8\u306e\u30a4\u30f3\u30d1\u30af\u30c8\u3092\u3069\u306e\u3088\u3046\u306b\u7279\u5b9a\u3059\u308b\u306e\u304b\u306e\u4f8b\u3092\u793a\u3057\u307e\u3059\u3002<\/p>\n<p>JSON<br \/>\n{<br \/>\n&#8220;id&#8221; : &#8220;2e3495a2-de2c-4a6a-9a8e-f6d4c4796f19&#8221;,<br \/>\n&#8220;runId&#8221; : &#8220;e36e9d7e-d2b1-4a43-b0b3-e875e767e1fe&#8221;,<br \/>\n&#8220;&#8230;&#8221;,<br \/>\n&#8220;batchId&#8221; : 0,<br \/>\n&#8220;durationMs&#8221; : {<br \/>\n&#8220;&#8230;&#8221;,<br \/>\n&#8220;triggerExecution&#8221; : 547730,<br \/>\n&#8220;&#8230;&#8221;<br \/>\n},<br \/>\n&#8220;stateOperators&#8221; : [ {<br \/>\n&#8220;&#8230;&#8221;,<br \/>\n&#8220;commitTimeMs&#8221; : 3186626,<br \/>\n&#8220;numShufflePartitions&#8221; : 64,<br \/>\n&#8220;&#8230;&#8221;<br \/>\n}]<br \/>\n}<\/p>\n<p>\u4e0a\u8a18\u306e\u30af\u30a8\u30ea\u30fc\u9032\u6357\u30a4\u30d9\u30f3\u30c8\u306e\u30b9\u30c6\u30fc\u30c8\u30c1\u30a7\u30c3\u30af\u30dd\u30a4\u30f3\u30c8\u306e\u30ec\u30fc\u30c6\u30f3\u30b7\u30fc\u306e\u5206\u6790<\/p>\n<p>\u30d0\u30c3\u30c1\u671f\u9593(durationMs.triggerDuration)\u306f\u7d04547\u79d2\u3002<br \/>\n\u30b9\u30c6\u30fc\u30c8\u30b9\u30c8\u30a2\u306e\u30b3\u30df\u30c3\u30c8\u306e\u30ec\u30fc\u30c6\u30f3\u30b7\u30fc(stateOperations[0].commitTimeMs)\u306f\u7d043,186\u79d2\u3002\u30b3\u30df\u30c3\u30c8\u306e\u30ec\u30fc\u30c6\u30f3\u30b7\u30fc\u306f\u30b9\u30c6\u30fc\u30c8\u30b9\u30c8\u30a2\u3092\u6301\u3064\u30bf\u30b9\u30af\u3067\u5408\u8a08\u3055\u308c\u307e\u3059\u3002\u3053\u306e\u5834\u5408\u3001\u305d\u306e\u3088\u3046\u306a\u30bf\u30b9\u30af\u306f64\u500b(stateOperators[0].numShufflePartitions)\u3067\u3059\u3002<br \/>\n\u30b9\u30c6\u30fc\u30c8\u30aa\u30da\u30ec\u30fc\u30bf\u3092\u6301\u3064\u305d\u308c\u305e\u308c\u306e\u30bf\u30b9\u30af\u306f\u3001\u30c1\u30a7\u30c3\u30af\u30dd\u30a4\u30f3\u30c8\u4f5c\u6210\u306b\u5e73\u574750\u79d2(3,186\/64)\u304b\u304b\u3063\u3066\u3044\u307e\u3059\u3002\u3053\u308c\u306f\u8ffd\u52a0\u306e\u30ec\u30fc\u30c6\u30f3\u30b7\u30fc\u3067\u3042\u308a\u3001\u30d0\u30c3\u30c1\u671f\u9593\u306b\u52a0\u7b97\u3055\u308c\u307e\u3059\u300264\u500b\u3059\u3079\u3066\u306e\u30bf\u30b9\u30af\u304c\u540c\u6642\u306b\u5b9f\u884c\u3059\u308b\u3068\u4eee\u5b9a\u3059\u308b\u3068\u3001\u30c1\u30a7\u30c3\u30af\u30dd\u30a4\u30f3\u30c8\u306e\u30b9\u30c6\u30c3\u30d7\u306f\u30d0\u30c3\u30c1\u51e6\u7406\u671f\u9593\u306e\u7d049%(50\u79d2 \/ 547\u79d2)\u3092\u5360\u3081\u3066\u3044\u307e\u3059\u3002\u6700\u5927\u540c\u6642\u30bf\u30b9\u30af\u5b9f\u884c\u6570\u304c64\u3088\u308a\u5c11\u306a\u304f\u306a\u308b\u3068\u3001\u3053\u306e\u30d1\u30fc\u30bb\u30f3\u30c6\u30fc\u30b8\u306f\u3055\u3089\u306b\u5897\u52a0\u3057\u307e\u3059\u3002<\/p>\n<h3>\u542f\u7528\u975e\u540c\u6b65\u72b6\u6001\u68c0\u67e5\u70b9\u521b\u5efa<\/h3>\n<p>\u5728\u6d41\u5a92\u4f53\u4f5c\u4e1a\u4e2d\u8fdb\u884c\u4ee5\u4e0b\u8bbe\u7f6e\u3002\u8981\u8fdb\u884c\u5f02\u6b65\u72b6\u6001\u68c0\u67e5\u70b9\u521b\u5efa\uff0c\u9700\u8981\u4f7f\u7528\u652f\u6301\u5f02\u6b65\u63d0\u4ea4\u7684\u72b6\u6001\u5b58\u50a8\u5b9e\u73b0\u3002\u76ee\u524d\u4ec5\u652f\u6301\u57fa\u4e8eRocksDB\u7684\u72b6\u6001\u5b58\u50a8\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"nv\">spark<\/span><span class=\"o\">.<\/span><span class=\"py\">conf<\/span><span class=\"o\">.<\/span><span class=\"py\">set<\/span><span class=\"o\">(<\/span>\r\n  <span class=\"s\">\"spark.databricks.streaming.statefulOperator.asyncCheckpoint.enabled\"<\/span><span class=\"o\">,<\/span>\r\n  <span class=\"s\">\"true\"<\/span>\r\n<span class=\"o\">)<\/span>\r\n\r\n<span class=\"nv\">spark<\/span><span class=\"o\">.<\/span><span class=\"py\">conf<\/span><span class=\"o\">.<\/span><span class=\"py\">set<\/span><span class=\"o\">(<\/span>\r\n  <span class=\"s\">\"spark.sql.streaming.stateStore.providerClass\"<\/span><span class=\"o\">,<\/span>\r\n  <span class=\"s\">\"com.databricks.sql.streaming.state.RocksDBStateStoreProvider\"<\/span>\r\n<span class=\"o\">)<\/span>\r\n<\/code><\/pre>\n<h3>\u9650\u5236<\/h3>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u975e\u540c\u671f\u30c1\u30a7\u30c3\u30af\u30dd\u30a4\u30f3\u30c8\u306b\u304a\u3051\u308b\u3042\u3089\u3086\u308b\u5931\u6557\u306f\u3001\u30af\u30a8\u30ea\u30fc\u81ea\u4f53\u3092\u5931\u6557\u3055\u305b\u307e\u3059\u3002\u540c\u671f\u30c1\u30a7\u30c3\u30af\u30dd\u30a4\u30f3\u30c8\u4f5c\u6210\u30e2\u30fc\u30c9\u3067\u306f\u3001\u30c1\u30a7\u30c3\u30af\u30dd\u30a4\u30f3\u30c8\u306f\u30bf\u30b9\u30af\u306e\u4e00\u90e8\u3068\u3057\u3066\u5b9f\u884c\u3055\u308c\u3001Spark\u306f\u30af\u30a8\u30ea\u30fc\u304c\u5931\u6557\u3059\u308b\u524d\u306b\u30bf\u30b9\u30af\u3092\u8907\u6570\u56de\u30ea\u30c8\u30e9\u30a4\u3057\u307e\u3059\u3002\u3053\u306e\u6a5f\u69cb\u306f\u975e\u540c\u671f\u30c1\u30a7\u30c3\u30af\u30dd\u30a4\u30f3\u30c8\u4f5c\u6210\u30e2\u30fc\u30c9\u3067\u306f\u5b58\u5728\u3057\u307e\u305b\u3093\u3002\u3057\u304b\u3057\u3001Databricks\u30b8\u30e7\u30d6\u306e\u30ea\u30c8\u30e9\u30a4\u3092\u4f7f\u3046\u3053\u3068\u3067\u3001\u305d\u306e\u3088\u3046\u306a\u51e6\u7406\u5931\u6557\u306b\u5bfe\u3057\u3066\u81ea\u52d5\u3067\u30ea\u30c8\u30e9\u30a4\u3059\u308b\u3088\u3046\u306b\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u975e\u540c\u671f\u30b9\u30c6\u30fc\u30c8\u30c1\u30a7\u30c3\u30af\u30dd\u30a4\u30f3\u30c8\u4f5c\u6210\u3068\u30aa\u30fc\u30c8\u30b9\u30b1\u30fc\u30ea\u30f3\u30b0\u306e\u7d44\u307f\u5408\u308f\u305b\u306f\u52d5\u4f5c\u3057\u307e\u305b\u3093\u3002\u30de\u30a4\u30af\u30ed\u30d0\u30c3\u30c1\u306e\u5b9f\u884c\u306e\u5408\u9593\u306b\u30b9\u30c6\u30fc\u30c8\u30b9\u30c8\u30a2\u306e\u5834\u6240\u304c\u3078\u3053\u3046\u3055\u308c\u306a\u3044\u5834\u5408\u306b\u306f\u3001\u975e\u540c\u671f\u30c1\u30a7\u30c3\u30af\u30dd\u30a4\u30f3\u30c8\u4f5c\u6210\u304c\u3082\u3063\u3068\u3082\u3046\u307e\u304f\u52d5\u4f5c\u3057\u307e\u3059\u3002\u30aa\u30fc\u30c8\u30b9\u30b1\u30fc\u30ea\u30f3\u30b0\u3092\u6709\u52b9\u5316\u3059\u308b\u3068\u3001\u30aa\u30fc\u30c8\u30b9\u30b1\u30fc\u30ea\u30f3\u30b0\u306e\u4e00\u90e8\u3067\u30ce\u30fc\u30c9\u304c\u8ffd\u52a0\u3001\u524a\u9664\u3055\u308c\u308b\u305f\u3073\u306b\u3001\u30b9\u30c6\u30fc\u30c8\u30b9\u30c8\u30a2\u306e\u30a4\u30f3\u30b9\u30bf\u30f3\u30b9\u304c\u518d\u5206\u6563\u3055\u308c\u308b\u5834\u5408\u304c\u3042\u308a\u307e\u3059\u3002<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\u975e\u540c\u671f\u30b9\u30c6\u30fc\u30c8\u30c1\u30a7\u30c3\u30af\u30dd\u30a4\u30f3\u30c8\u4f5c\u6210\u306fRocksDB\u30b9\u30c6\u30fc\u30c8\u30b9\u30c8\u30a2\u30d7\u30ed\u30d0\u30a4\u30c0\u30fc\u5b9f\u88c5\u3067\u306e\u307f\u30b5\u30dd\u30fc\u30c8\u3055\u308c\u3066\u3044\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306e\u30a4\u30f3\u30e1\u30e2\u30ea\u30b9\u30c6\u30fc\u30c8\u30b9\u30c8\u30a2\u5b9f\u88c5\u306f\u975e\u540c\u671f\u30b9\u30c6\u30fc\u30c8\u30c1\u30a7\u30c3\u30af\u30dd\u30a4\u30f3\u30c8\u4f5c\u6210\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u3066\u3044\u307e\u305b\u3093\u3002<\/ul>\n<h1>\u591a\u79cd\u6c34\u5370\u7b56\u7565<\/h1>\n<p>\u5728\u6d41\u5f0f\u67e5\u8be2\u4e2d\uff0c\u53ef\u4ee5\u5305\u542b\u591a\u4e2a\u8f93\u5165\u6d41\u8fdb\u884cunion\u548cjoin\u64cd\u4f5c\u3002\u6bcf\u4e2a\u8f93\u5165\u6d41\u90fd\u53ef\u4ee5\u4f7f\u7528\u5e26\u6709Watermark(&#8220;eventTime&#8221;\uff0cdelay)\u7684\u72b6\u6001\u64cd\u4f5c\u6765\u6307\u5b9a\u4e0d\u540c\u7684\u5ef6\u8fdf\u6570\u636e\u9608\u503c\u3002\u53ef\u4ee5\u901a\u8fc7\u4f7f\u7528withWatermarks(&#8220;eventTime&#8221;\uff0cdelay)\u5728\u6bcf\u4e2a\u8f93\u5165\u6d41\u4e0a\u6765\u6307\u5b9a\u8fd9\u4e9b\u9608\u503c\u3002\u4f8b\u5982\uff0c\u8003\u8651\u4e00\u4e2a\u6d41\u4e0e\u6d41\u7684join\u67e5\u8be2\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"k\">val<\/span> <span class=\"nv\">inputStream1<\/span> <span class=\"k\">=<\/span> <span class=\"o\">...<\/span>      <span class=\"c1\">\/\/ delays up to 1 hour<\/span>\r\n<span class=\"k\">val<\/span> <span class=\"nv\">inputStream2<\/span> <span class=\"k\">=<\/span> <span class=\"o\">...<\/span>      <span class=\"c1\">\/\/ delays up to 2 hours<\/span>\r\n\r\n<span class=\"nv\">inputStream1<\/span><span class=\"o\">.<\/span><span class=\"py\">withWatermark<\/span><span class=\"o\">(<\/span><span class=\"s\">\"eventTime1\"<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"1 hour\"<\/span><span class=\"o\">)<\/span>\r\n  <span class=\"o\">.<\/span><span class=\"py\">join<\/span><span class=\"o\">(<\/span>\r\n    <span class=\"nv\">inputStream2<\/span><span class=\"o\">.<\/span><span class=\"py\">withWatermark<\/span><span class=\"o\">(<\/span><span class=\"s\">\"eventTime2\"<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"2 hours\"<\/span><span class=\"o\">),<\/span>\r\n    <span class=\"n\">joinCondition<\/span><span class=\"o\">)<\/span>\r\n<\/code><\/pre>\n<p>\u5728\u6267\u884c\u67e5\u8be2\u65f6\uff0c\u7ed3\u6784\u5316\u6d41\u4f1a\u5355\u72ec\u8ddf\u8e2a\u6bcf\u4e2a\u8f93\u5165\u6d41\u7684\u6700\u5927\u4e8b\u4ef6\u65f6\u95f4\uff0c\u5e76\u6839\u636e\u76f8\u5e94\u7684\u5ef6\u8fdf\u8ba1\u7b97\u6c34\u5370\uff0c\u7136\u540e\u9009\u62e9\u7528\u4e8e\u72b6\u6001\u64cd\u4f5c\u7684\u5355\u4e2a\u5168\u5c40\u6c34\u5370\u3002\u9ed8\u8ba4\u60c5\u51b5\u4e0b\uff0c\u4e3a\u4e86\u9632\u6b62\u51fa\u73b0\u610f\u5916\u7684\u6570\u636e\u4e22\u5931\uff0c\u4f7f\u5f97\u67d0\u4e2a\u6d41\u7531\u4e8e\u5ef6\u8fdf\u800c\u6bd4\u5176\u4ed6\u6d41\u6ede\u540e\uff0c\u6700\u5c0f\u503c\u88ab\u7528\u4f5c\u5168\u5c40\u6c34\u5370\uff08\u4f8b\u5982\uff0c\u67d0\u4e2a\u6d41\u7531\u4e8e\u4e0a\u6e38\u6d41\u7684\u6545\u969c\u800c\u505c\u6b62\u63a5\u6536\u6570\u636e\uff09\u3002\u6362\u53e5\u8bdd\u8bf4\uff0c\u5168\u5c40\u6c34\u5370\u53ef\u4ee5\u5b89\u5168\u5730\u4e0e\u6700\u6162\u7684\u6d41\u540c\u6b65\uff0c\u4ece\u800c\u4f7f\u67e5\u8be2\u8f93\u51fa\u4e5f\u76f8\u5e94\u5730\u5ef6\u8fdf\u3002<\/p>\n<p>\u5728\u67d0\u4e9b\u60c5\u51b5\u4e0b\uff0c\u5373\u4f7f\u4ece\u6700\u6162\u7684\u6570\u636e\u6d41\u4e2d\u4e22\u5f03\u6570\u636e\uff0c\u6211\u4eec\u53ef\u80fd\u4ecd\u7136\u5e0c\u671b\u66f4\u5feb\u5730\u83b7\u5f97\u7ed3\u679c\u3002\u901a\u8fc7\u5c06SQL\u8bbe\u7f6espark.sql.streaming.multipleWatermarkPolicy\u8bbe\u7f6e\u4e3amax\uff08\u9ed8\u8ba4\u4e3amin\uff09\uff0c\u53ef\u4ee5\u8bbe\u7f6e\u591a\u4e2a\u6c34\u5370\u7b56\u7565\u4ee5\u9009\u62e9\u5168\u5c40\u6c34\u5370\u7684\u6700\u5927\u503c\u3002\u8fd9\u6837\uff0c\u5168\u5c40\u6c34\u5370\u5c06\u4e0e\u6700\u5feb\u7684\u6570\u636e\u6d41\u540c\u6b65\u3002\u7136\u800c\uff0c\u4f5c\u4e3a\u526f\u4f5c\u7528\uff0c\u6765\u81ea\u6700\u6162\u6570\u636e\u6d41\u7684\u6570\u636e\u5c06\u88ab\u79ef\u6781\u4e22\u5f03\u3002\u56e0\u6b64\uff0c\u5efa\u8bae\u5728\u8003\u8651\u540e\u4f7f\u7528\u6b64\u8bbe\u7f6e\u3002<\/p>\n<h1>\u53ef\u89c6\u5316\u7ed3\u6784\u5316\u6d41\u6570\u636e\u6846\u67b6<\/h1>\n<p>\u4e3a\u4e86\u5b9e\u65f6\u53ef\u89c6\u5316\u7ed3\u6784\u5316\u6d41\u5f0f\u6570\u636e\u6846\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528display\u51fd\u6570\u3002trigger\u548ccheckpointLocation\u53c2\u6570\u662f\u53ef\u9009\u7684\uff0c\u4f46\u4f5c\u4e3a\u6700\u4f73\u5b9e\u8df5\uff0c\u5728\u751f\u4ea7\u73af\u5883\u4e2d\u5efa\u8bae\u59cb\u7ec8\u6307\u5b9a\u5b83\u4eec\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"k\">import<\/span> <span class=\"nn\">org.apache.spark.sql.streaming.Trigger<\/span>\r\n\r\n<span class=\"k\">val<\/span> <span class=\"nv\">streaming_df<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">spark<\/span><span class=\"o\">.<\/span><span class=\"py\">readStream<\/span><span class=\"o\">.<\/span><span class=\"py\">format<\/span><span class=\"o\">(<\/span><span class=\"s\">\"rate\"<\/span><span class=\"o\">).<\/span><span class=\"py\">load<\/span><span class=\"o\">()<\/span>\r\n<span class=\"nf\">display<\/span><span class=\"o\">(<\/span><span class=\"nv\">streaming_df<\/span><span class=\"o\">.<\/span><span class=\"py\">groupBy<\/span><span class=\"o\">().<\/span><span class=\"py\">count<\/span><span class=\"o\">(),<\/span> <span class=\"n\">trigger<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">Trigger<\/span><span class=\"o\">.<\/span><span class=\"py\">ProcessingTime<\/span><span class=\"o\">(<\/span><span class=\"s\">\"5 seconds\"<\/span><span class=\"o\">),<\/span> <span class=\"n\">checkpointLocation<\/span> <span class=\"k\">=<\/span> <span class=\"s\">\"dbfs:\/&lt;checkpoint-path&gt;\"<\/span><span class=\"o\">)<\/span>\r\n<\/code><\/pre>\n<pre class=\"post-pre\"><code><span class=\"n\">streaming_df<\/span> <span class=\"o\">=<\/span> <span class=\"n\">spark<\/span><span class=\"p\">.<\/span><span class=\"n\">readStream<\/span><span class=\"p\">.<\/span><span class=\"nf\">format<\/span><span class=\"p\">(<\/span><span class=\"sh\">\"<\/span><span class=\"s\">rate<\/span><span class=\"sh\">\"<\/span><span class=\"p\">).<\/span><span class=\"nf\">load<\/span><span class=\"p\">()<\/span>\r\n<span class=\"nf\">display<\/span><span class=\"p\">(<\/span><span class=\"n\">streaming_df<\/span><span class=\"p\">.<\/span><span class=\"nf\">groupBy<\/span><span class=\"p\">().<\/span><span class=\"nf\">count<\/span><span class=\"p\">(),<\/span> <span class=\"n\">processingTime<\/span> <span class=\"o\">=<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">5 seconds<\/span><span class=\"sh\">\"<\/span><span class=\"p\">,<\/span> <span class=\"n\">checkpointLocation<\/span> <span class=\"o\">=<\/span> <span class=\"sh\">\"<\/span><span class=\"s\">dbfs:\/&lt;checkpoint-path&gt;<\/span><span class=\"sh\">\"<\/span><span class=\"p\">)<\/span>\r\n<\/code><\/pre>\n<p>\u8bf7\u53c2\u8003\u6709\u5173Structured Streaming DataFrames\u7684\u8be6\u7ec6\u4fe1\u606f\u3002<\/p>\n<h1>\u6539\u5584flatMapGroupsWithState\u65b9\u6cd5\u7684State Operator<\/h1>\n<h2>\u521d\u59cb\u72b6\u6001\u7684\u8bbe\u5b9a<\/h2>\n<p>\u60a8\u53ef\u4ee5\u4f7f\u7528[flat]MapGroupsWithState\u64cd\u4f5c\u7b26\u6765\u6307\u5b9a\u7528\u6237\u5b9a\u4e49\u7684\u521d\u59cb\u72b6\u6001\uff0c\u7528\u4e8e\u7ed3\u6784\u5316\u6d41\u5904\u7406\u4e2d\u7684\u72b6\u6001\u5904\u7406\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"k\">def<\/span> <span class=\"nf\">mapGroupsWithState<\/span><span class=\"o\">[<\/span><span class=\"kt\">S:<\/span> <span class=\"kt\">Encoder<\/span>, <span class=\"kt\">U:<\/span> <span class=\"kt\">Encoder<\/span><span class=\"o\">](<\/span>\r\n    <span class=\"n\">timeoutConf<\/span><span class=\"k\">:<\/span> <span class=\"kt\">GroupStateTimeout<\/span><span class=\"o\">,<\/span>\r\n    <span class=\"n\">initialState<\/span><span class=\"k\">:<\/span> <span class=\"kt\">KeyValueGroupedDataset<\/span><span class=\"o\">[<\/span><span class=\"kt\">K<\/span>, <span class=\"kt\">S<\/span><span class=\"o\">])(<\/span>\r\n    <span class=\"n\">func<\/span><span class=\"k\">:<\/span> <span class=\"o\">(<\/span><span class=\"kt\">K<\/span><span class=\"o\">,<\/span> <span class=\"kt\">Iterator<\/span><span class=\"o\">[<\/span><span class=\"kt\">V<\/span><span class=\"o\">],<\/span> <span class=\"nc\">GroupState<\/span><span class=\"o\">[<\/span><span class=\"kt\">S<\/span><span class=\"o\">])<\/span> <span class=\"k\">=&gt;<\/span> <span class=\"n\">U<\/span><span class=\"o\">)<\/span><span class=\"k\">:<\/span> <span class=\"kt\">Dataset<\/span><span class=\"o\">[<\/span><span class=\"kt\">U<\/span><span class=\"o\">]<\/span>\r\n\r\n<span class=\"k\">def<\/span> <span class=\"nf\">flatMapGroupsWithState<\/span><span class=\"o\">[<\/span><span class=\"kt\">S:<\/span> <span class=\"kt\">Encoder<\/span>, <span class=\"kt\">U:<\/span> <span class=\"kt\">Encoder<\/span><span class=\"o\">](<\/span>\r\n    <span class=\"n\">outputMode<\/span><span class=\"k\">:<\/span> <span class=\"kt\">OutputMode<\/span><span class=\"o\">,<\/span>\r\n    <span class=\"n\">timeoutConf<\/span><span class=\"k\">:<\/span> <span class=\"kt\">GroupStateTimeout<\/span><span class=\"o\">,<\/span>\r\n    <span class=\"n\">initialState<\/span><span class=\"k\">:<\/span> <span class=\"kt\">KeyValueGroupedDataset<\/span><span class=\"o\">[<\/span><span class=\"kt\">K<\/span>, <span class=\"kt\">S<\/span><span class=\"o\">])(<\/span>\r\n    <span class=\"n\">func<\/span><span class=\"k\">:<\/span> <span class=\"o\">(<\/span><span class=\"kt\">K<\/span><span class=\"o\">,<\/span> <span class=\"kt\">Iterator<\/span><span class=\"o\">[<\/span><span class=\"kt\">V<\/span><span class=\"o\">],<\/span> <span class=\"nc\">GroupState<\/span><span class=\"o\">[<\/span><span class=\"kt\">S<\/span><span class=\"o\">])<\/span> <span class=\"k\">=&gt;<\/span> <span class=\"nc\">Iterator<\/span><span class=\"o\">[<\/span><span class=\"kt\">U<\/span><span class=\"o\">])<\/span>\r\n<\/code><\/pre>\n<p>\u4ee5\u4e0b\u662f\u4e3aflatMapGroupsWithState\u64cd\u4f5c\u7b26\u6307\u5b9a\u521d\u59cb\u72b6\u6001\u7684\u4e00\u4e2a\u793a\u4f8b\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"k\">val<\/span> <span class=\"nv\">fruitCountFunc<\/span> <span class=\"o\">=(<\/span><span class=\"n\">key<\/span><span class=\"k\">:<\/span> <span class=\"kt\">String<\/span><span class=\"o\">,<\/span> <span class=\"n\">values<\/span><span class=\"k\">:<\/span> <span class=\"kt\">Iterator<\/span><span class=\"o\">[<\/span><span class=\"kt\">String<\/span><span class=\"o\">],<\/span> <span class=\"n\">state<\/span><span class=\"k\">:<\/span> <span class=\"kt\">GroupState<\/span><span class=\"o\">[<\/span><span class=\"kt\">RunningCount<\/span><span class=\"o\">])<\/span> <span class=\"k\">=&gt;<\/span> <span class=\"o\">{<\/span>\r\n  <span class=\"k\">val<\/span> <span class=\"nv\">count<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">state<\/span><span class=\"o\">.<\/span><span class=\"py\">getOption<\/span><span class=\"o\">.<\/span><span class=\"py\">map<\/span><span class=\"o\">(<\/span><span class=\"nv\">_<\/span><span class=\"o\">.<\/span><span class=\"py\">count<\/span><span class=\"o\">).<\/span><span class=\"py\">getOrElse<\/span><span class=\"o\">(<\/span><span class=\"mi\">0L<\/span><span class=\"o\">)<\/span> <span class=\"o\">+<\/span> <span class=\"nv\">valList<\/span><span class=\"o\">.<\/span><span class=\"py\">size<\/span>\r\n  <span class=\"nv\">state<\/span><span class=\"o\">.<\/span><span class=\"py\">update<\/span><span class=\"o\">(<\/span><span class=\"k\">new<\/span> <span class=\"nc\">RunningCount<\/span><span class=\"o\">(<\/span><span class=\"n\">count<\/span><span class=\"o\">))<\/span>\r\n  <span class=\"nc\">Iterator<\/span><span class=\"o\">((<\/span><span class=\"n\">key<\/span><span class=\"o\">,<\/span> <span class=\"nv\">count<\/span><span class=\"o\">.<\/span><span class=\"py\">toString<\/span><span class=\"o\">))<\/span>\r\n<span class=\"o\">}<\/span>\r\n\r\n<span class=\"k\">val<\/span> <span class=\"nv\">fruitCountInitialDS<\/span><span class=\"k\">:<\/span> <span class=\"kt\">Dataset<\/span><span class=\"o\">[(<\/span><span class=\"kt\">String<\/span>, <span class=\"kt\">RunningCount<\/span><span class=\"o\">)]<\/span> <span class=\"k\">=<\/span> <span class=\"nc\">Seq<\/span><span class=\"o\">(<\/span>\r\n  <span class=\"o\">(<\/span><span class=\"s\">\"apple\"<\/span><span class=\"o\">,<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">RunningCount<\/span><span class=\"o\">(<\/span><span class=\"mi\">1<\/span><span class=\"o\">)),<\/span>\r\n  <span class=\"o\">(<\/span><span class=\"s\">\"orange\"<\/span><span class=\"o\">,<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">RunningCount<\/span><span class=\"o\">(<\/span><span class=\"mi\">2<\/span><span class=\"o\">)),<\/span>\r\n  <span class=\"o\">(<\/span><span class=\"s\">\"mango\"<\/span><span class=\"o\">,<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">RunningCount<\/span><span class=\"o\">(<\/span><span class=\"mi\">5<\/span><span class=\"o\">)),<\/span>\r\n<span class=\"o\">).<\/span><span class=\"py\">toDS<\/span><span class=\"o\">()<\/span>\r\n\r\n<span class=\"k\">val<\/span> <span class=\"nv\">fruitCountInitial<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">initialState<\/span><span class=\"o\">.<\/span><span class=\"py\">groupByKey<\/span><span class=\"o\">(<\/span><span class=\"n\">x<\/span> <span class=\"k\">=&gt;<\/span> <span class=\"nv\">x<\/span><span class=\"o\">.<\/span><span class=\"py\">_1<\/span><span class=\"o\">).<\/span><span class=\"py\">mapValues<\/span><span class=\"o\">(<\/span><span class=\"nv\">_<\/span><span class=\"o\">.<\/span><span class=\"py\">_2<\/span><span class=\"o\">)<\/span>\r\n\r\n<span class=\"n\">fruitStream<\/span>\r\n  <span class=\"o\">.<\/span><span class=\"py\">groupByKey<\/span><span class=\"o\">(<\/span><span class=\"n\">x<\/span> <span class=\"k\">=&gt;<\/span> <span class=\"n\">x<\/span><span class=\"o\">)<\/span>\r\n  <span class=\"o\">.<\/span><span class=\"py\">flatMapGroupsWithState<\/span><span class=\"o\">(<\/span><span class=\"nc\">Update<\/span><span class=\"o\">,<\/span> <span class=\"nv\">GroupStateTimeout<\/span><span class=\"o\">.<\/span><span class=\"py\">NoTimeout<\/span><span class=\"o\">,<\/span> <span class=\"n\">fruitCountInitial<\/span><span class=\"o\">)(<\/span><span class=\"n\">fruitCountFunc<\/span><span class=\"o\">)<\/span>\r\n<\/code><\/pre>\n<p>\u4ee5\u4e0b\u662f\u4e00\u4e2a\u793a\u4f8b\uff0c\u6f14\u793a\u5982\u4f55\u5728mapGroupsWithState\u64cd\u4f5c\u7b26\u4e2d\u6307\u5b9a\u521d\u59cb\u72b6\u6001\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"k\">val<\/span> <span class=\"nv\">fruitCountFunc<\/span> <span class=\"o\">=(<\/span><span class=\"n\">key<\/span><span class=\"k\">:<\/span> <span class=\"kt\">String<\/span><span class=\"o\">,<\/span> <span class=\"n\">values<\/span><span class=\"k\">:<\/span> <span class=\"kt\">Iterator<\/span><span class=\"o\">[<\/span><span class=\"kt\">String<\/span><span class=\"o\">],<\/span> <span class=\"n\">state<\/span><span class=\"k\">:<\/span> <span class=\"kt\">GroupState<\/span><span class=\"o\">[<\/span><span class=\"kt\">RunningCount<\/span><span class=\"o\">])<\/span> <span class=\"k\">=&gt;<\/span> <span class=\"o\">{<\/span>\r\n  <span class=\"k\">val<\/span> <span class=\"nv\">count<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">state<\/span><span class=\"o\">.<\/span><span class=\"py\">getOption<\/span><span class=\"o\">.<\/span><span class=\"py\">map<\/span><span class=\"o\">(<\/span><span class=\"nv\">_<\/span><span class=\"o\">.<\/span><span class=\"py\">count<\/span><span class=\"o\">).<\/span><span class=\"py\">getOrElse<\/span><span class=\"o\">(<\/span><span class=\"mi\">0L<\/span><span class=\"o\">)<\/span> <span class=\"o\">+<\/span> <span class=\"nv\">valList<\/span><span class=\"o\">.<\/span><span class=\"py\">size<\/span>\r\n  <span class=\"nv\">state<\/span><span class=\"o\">.<\/span><span class=\"py\">update<\/span><span class=\"o\">(<\/span><span class=\"k\">new<\/span> <span class=\"nc\">RunningCount<\/span><span class=\"o\">(<\/span><span class=\"n\">count<\/span><span class=\"o\">))<\/span>\r\n  <span class=\"o\">(<\/span><span class=\"n\">key<\/span><span class=\"o\">,<\/span> <span class=\"nv\">count<\/span><span class=\"o\">.<\/span><span class=\"py\">toString<\/span><span class=\"o\">)<\/span>\r\n<span class=\"o\">}<\/span>\r\n\r\n<span class=\"k\">val<\/span> <span class=\"nv\">fruitCountInitialDS<\/span><span class=\"k\">:<\/span> <span class=\"kt\">Dataset<\/span><span class=\"o\">[(<\/span><span class=\"kt\">String<\/span>, <span class=\"kt\">RunningCount<\/span><span class=\"o\">)]<\/span> <span class=\"k\">=<\/span> <span class=\"nc\">Seq<\/span><span class=\"o\">(<\/span>\r\n  <span class=\"o\">(<\/span><span class=\"s\">\"apple\"<\/span><span class=\"o\">,<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">RunningCount<\/span><span class=\"o\">(<\/span><span class=\"mi\">1<\/span><span class=\"o\">)),<\/span>\r\n  <span class=\"o\">(<\/span><span class=\"s\">\"orange\"<\/span><span class=\"o\">,<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">RunningCount<\/span><span class=\"o\">(<\/span><span class=\"mi\">2<\/span><span class=\"o\">)),<\/span>\r\n  <span class=\"o\">(<\/span><span class=\"s\">\"mango\"<\/span><span class=\"o\">,<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">RunningCount<\/span><span class=\"o\">(<\/span><span class=\"mi\">5<\/span><span class=\"o\">)),<\/span>\r\n<span class=\"o\">).<\/span><span class=\"py\">toDS<\/span><span class=\"o\">()<\/span>\r\n\r\n<span class=\"k\">val<\/span> <span class=\"nv\">fruitCountInitial<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">initialState<\/span><span class=\"o\">.<\/span><span class=\"py\">groupByKey<\/span><span class=\"o\">(<\/span><span class=\"n\">x<\/span> <span class=\"k\">=&gt;<\/span> <span class=\"nv\">x<\/span><span class=\"o\">.<\/span><span class=\"py\">_1<\/span><span class=\"o\">).<\/span><span class=\"py\">mapValues<\/span><span class=\"o\">(<\/span><span class=\"nv\">_<\/span><span class=\"o\">.<\/span><span class=\"py\">_2<\/span><span class=\"o\">)<\/span>\r\n\r\n<span class=\"n\">fruitStream<\/span>\r\n  <span class=\"o\">.<\/span><span class=\"py\">groupByKey<\/span><span class=\"o\">(<\/span><span class=\"n\">x<\/span> <span class=\"k\">=&gt;<\/span> <span class=\"n\">x<\/span><span class=\"o\">)<\/span>\r\n  <span class=\"o\">.<\/span><span class=\"py\">mapGroupsWithState<\/span><span class=\"o\">(<\/span><span class=\"nv\">GroupStateTimeout<\/span><span class=\"o\">.<\/span><span class=\"py\">NoTimeout<\/span><span class=\"o\">,<\/span> <span class=\"n\">fruitCountInitial<\/span><span class=\"o\">)(<\/span><span class=\"n\">fruitCountFunc<\/span><span class=\"o\">)<\/span>\r\n<\/code><\/pre>\n<h2>\u6d4b\u8bd5\u72b6\u6001\u66f4\u65b0\u51fd\u6570<\/h2>\n<p>\u901a\u8fc7\u4f7f\u7528TestGroupState API\uff0c\u53ef\u4ee5\u6d4b\u8bd5Dataset.groupByKey\uff08&#8230;\uff09.mapGroupsWithState\uff08&#8230;\uff09\u548cDataset.groupByKey\uff08&#8230;\uff09.flatMapGroupsWithState\uff08&#8230;\uff09\u7684\u72b6\u6001\u66f4\u65b0\u51fd\u6570\u3002<\/p>\n<p>\u72b6\u6001\u66f4\u65b0\u51fd\u6570\u63a5\u6536\u5148\u524d\u72b6\u6001\u4e3a\u8f93\u5165\uff0c\u8be5\u8f93\u5165\u662f GroupState\u5bf9\u8c61\u7c7b\u578b\u3002\u6709\u5173\u793a\u4f8b\uff0c\u8bf7\u53c2\u8003Apache Spark\u7684GroupState\u53c2\u8003\u6587\u6863\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"k\">import<\/span> <span class=\"nn\">org.apache.spark.sql.streaming._<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">org.apache.spark.api.java.Optional<\/span>\r\n\r\n<span class=\"nf\">test<\/span><span class=\"o\">(<\/span><span class=\"s\">\"flatMapGroupsWithState's state update function\"<\/span><span class=\"o\">)<\/span> <span class=\"o\">{<\/span>\r\n  <span class=\"k\">var<\/span> <span class=\"n\">prevState<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">TestGroupState<\/span><span class=\"o\">.<\/span><span class=\"py\">create<\/span><span class=\"o\">[<\/span><span class=\"kt\">UserStatus<\/span><span class=\"o\">](<\/span>\r\n    <span class=\"n\">optionalState<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">Optional<\/span><span class=\"o\">.<\/span><span class=\"py\">empty<\/span><span class=\"o\">[<\/span><span class=\"kt\">UserStatus<\/span><span class=\"o\">],<\/span>\r\n    <span class=\"n\">timeoutConf<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">GroupStateTimeout<\/span><span class=\"o\">.<\/span><span class=\"py\">EventTimeTimeout<\/span><span class=\"o\">,<\/span>\r\n    <span class=\"n\">batchProcessingTimeMs<\/span> <span class=\"k\">=<\/span> <span class=\"mi\">1L<\/span><span class=\"o\">,<\/span>\r\n    <span class=\"n\">eventTimeWatermarkMs<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">Optional<\/span><span class=\"o\">.<\/span><span class=\"py\">of<\/span><span class=\"o\">(<\/span><span class=\"mi\">1L<\/span><span class=\"o\">),<\/span>\r\n    <span class=\"n\">hasTimedOut<\/span> <span class=\"k\">=<\/span> <span class=\"kc\">false<\/span><span class=\"o\">)<\/span>\r\n\r\n  <span class=\"k\">val<\/span> <span class=\"nv\">userId<\/span><span class=\"k\">:<\/span> <span class=\"kt\">String<\/span> <span class=\"o\">=<\/span> <span class=\"o\">...<\/span>\r\n  <span class=\"k\">val<\/span> <span class=\"nv\">actions<\/span><span class=\"k\">:<\/span> <span class=\"kt\">Iterator<\/span><span class=\"o\">[<\/span><span class=\"kt\">UserAction<\/span><span class=\"o\">]<\/span> <span class=\"k\">=<\/span> <span class=\"o\">...<\/span>\r\n\r\n  <span class=\"nf\">assert<\/span><span class=\"o\">(!<\/span><span class=\"nv\">prevState<\/span><span class=\"o\">.<\/span><span class=\"py\">hasUpdated<\/span><span class=\"o\">)<\/span>\r\n\r\n  <span class=\"nf\">updateState<\/span><span class=\"o\">(<\/span><span class=\"n\">userId<\/span><span class=\"o\">,<\/span> <span class=\"n\">actions<\/span><span class=\"o\">,<\/span> <span class=\"n\">prevState<\/span><span class=\"o\">)<\/span>\r\n\r\n  <span class=\"nf\">assert<\/span><span class=\"o\">(<\/span><span class=\"nv\">prevState<\/span><span class=\"o\">.<\/span><span class=\"py\">hasUpdated<\/span><span class=\"o\">)<\/span>\r\n<span class=\"o\">}<\/span>\r\n<\/code><\/pre>\n<h3>Databricks \u514d\u8d39\u8bd5\u7528<\/h3>\n<p>Databricks \u514d\u8d39\u8bd5\u7528<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u5728\u751f\u4ea7\u73af\u5883\u4e2d\u4f7f\u7528\u7684\u7ed3\u6784\u5316\u6d41\u5f0f\u5904\u7406 | Databricks \u5728 AWS \u4e0a [\u622a\u81f3 2022\/3\/21] \u9019\u672c [&hellip;]<\/p>\n","protected":false},"author":7,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-47148","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"yoast_head":"<!-- This site is optimized with the Yoast SEO Premium plugin v21.5 (Yoast SEO v21.5) - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>\u5728Databricks\u4e2d\u5168\u9762\u8fd0\u7528\u7ed3\u6784\u5316\u6d41\u5904\u7406\u6280\u672f - Blog - Silicon Cloud<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.silicloud.com\/zh\/blog\/\u5728databricks\u4e2d\u5168\u9762\u8fd0\u7528\u7ed3\u6784\u5316\u6d41\u5904\u7406\u6280\u672f\/\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u5728Databricks\u4e2d\u5168\u9762\u8fd0\u7528\u7ed3\u6784\u5316\u6d41\u5904\u7406\u6280\u672f\" \/>\n<meta property=\"og:description\" content=\"\u5728\u751f\u4ea7\u73af\u5883\u4e2d\u4f7f\u7528\u7684\u7ed3\u6784\u5316\u6d41\u5f0f\u5904\u7406 | Databricks \u5728 AWS \u4e0a [\u622a\u81f3 2022\/3\/21] \u9019\u672c [&hellip;]\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.silicloud.com\/zh\/blog\/\u5728databricks\u4e2d\u5168\u9762\u8fd0\u7528\u7ed3\u6784\u5316\u6d41\u5904\u7406\u6280\u672f\/\" \/>\n<meta property=\"og:site_name\" content=\"Blog - Silicon Cloud\" \/>\n<meta property=\"article:published_time\" content=\"2024-01-15T16:37:15+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2024-04-29T14:56:14+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7ca7913a08637a69d0f4\/20-0.png\" \/>\n<meta name=\"author\" content=\"\u79d1, \u9896\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"\u79d1, \u9896\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"5 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%9c%a8databricks%e4%b8%ad%e5%85%a8%e9%9d%a2%e8%bf%90%e7%94%a8%e7%bb%93%e6%9e%84%e5%8c%96%e6%b5%81%e5%a4%84%e7%90%86%e6%8a%80%e6%9c%af\/\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%9c%a8databricks%e4%b8%ad%e5%85%a8%e9%9d%a2%e8%bf%90%e7%94%a8%e7%bb%93%e6%9e%84%e5%8c%96%e6%b5%81%e5%a4%84%e7%90%86%e6%8a%80%e6%9c%af\/\",\"name\":\"\u5728Databricks\u4e2d\u5168\u9762\u8fd0\u7528\u7ed3\u6784\u5316\u6d41\u5904\u7406\u6280\u672f - Blog - Silicon Cloud\",\"isPartOf\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\"},\"datePublished\":\"2024-01-15T16:37:15+00:00\",\"dateModified\":\"2024-04-29T14:56:14+00:00\",\"author\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/8ca01ba7f7362ad4edb7da206a12f29e\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%9c%a8databricks%e4%b8%ad%e5%85%a8%e9%9d%a2%e8%bf%90%e7%94%a8%e7%bb%93%e6%9e%84%e5%8c%96%e6%b5%81%e5%a4%84%e7%90%86%e6%8a%80%e6%9c%af\/#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%9c%a8databricks%e4%b8%ad%e5%85%a8%e9%9d%a2%e8%bf%90%e7%94%a8%e7%bb%93%e6%9e%84%e5%8c%96%e6%b5%81%e5%a4%84%e7%90%86%e6%8a%80%e6%9c%af\/\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%9c%a8databricks%e4%b8%ad%e5%85%a8%e9%9d%a2%e8%bf%90%e7%94%a8%e7%bb%93%e6%9e%84%e5%8c%96%e6%b5%81%e5%a4%84%e7%90%86%e6%8a%80%e6%9c%af\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.silicloud.com\/zh\/blog\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u5728Databricks\u4e2d\u5168\u9762\u8fd0\u7528\u7ed3\u6784\u5316\u6d41\u5904\u7406\u6280\u672f\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/\",\"name\":\"Blog - Silicon Cloud\",\"description\":\"\",\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/8ca01ba7f7362ad4edb7da206a12f29e\",\"name\":\"\u79d1, \u9896\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/secure.gravatar.com\/avatar\/8a6fb3cc7ba2f69d2189ba532aec4633ea7ed75ac0af162ec367cb3abc0fb2af?s=96&d=mm&r=g\",\"contentUrl\":\"https:\/\/secure.gravatar.com\/avatar\/8a6fb3cc7ba2f69d2189ba532aec4633ea7ed75ac0af162ec367cb3abc0fb2af?s=96&d=mm&r=g\",\"caption\":\"\u79d1, \u9896\"},\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/author\/keying\/\"},{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%9c%a8databricks%e4%b8%ad%e5%85%a8%e9%9d%a2%e8%bf%90%e7%94%a8%e7%bb%93%e6%9e%84%e5%8c%96%e6%b5%81%e5%a4%84%e7%90%86%e6%8a%80%e6%9c%af\/#local-main-organization-logo\",\"url\":\"\",\"contentUrl\":\"\",\"caption\":\"Blog - Silicon Cloud\"}]}<\/script>\n<!-- \/ Yoast SEO Premium plugin. -->","yoast_head_json":{"title":"\u5728Databricks\u4e2d\u5168\u9762\u8fd0\u7528\u7ed3\u6784\u5316\u6d41\u5904\u7406\u6280\u672f - Blog - Silicon Cloud","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.silicloud.com\/zh\/blog\/\u5728databricks\u4e2d\u5168\u9762\u8fd0\u7528\u7ed3\u6784\u5316\u6d41\u5904\u7406\u6280\u672f\/","og_locale":"zh_CN","og_type":"article","og_title":"\u5728Databricks\u4e2d\u5168\u9762\u8fd0\u7528\u7ed3\u6784\u5316\u6d41\u5904\u7406\u6280\u672f","og_description":"\u5728\u751f\u4ea7\u73af\u5883\u4e2d\u4f7f\u7528\u7684\u7ed3\u6784\u5316\u6d41\u5f0f\u5904\u7406 | Databricks \u5728 AWS \u4e0a [\u622a\u81f3 2022\/3\/21] \u9019\u672c [&hellip;]","og_url":"https:\/\/www.silicloud.com\/zh\/blog\/\u5728databricks\u4e2d\u5168\u9762\u8fd0\u7528\u7ed3\u6784\u5316\u6d41\u5904\u7406\u6280\u672f\/","og_site_name":"Blog - Silicon Cloud","article_published_time":"2024-01-15T16:37:15+00:00","article_modified_time":"2024-04-29T14:56:14+00:00","og_image":[{"url":"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7ca7913a08637a69d0f4\/20-0.png"}],"author":"\u79d1, \u9896","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"\u79d1, \u9896","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"5 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%9c%a8databricks%e4%b8%ad%e5%85%a8%e9%9d%a2%e8%bf%90%e7%94%a8%e7%bb%93%e6%9e%84%e5%8c%96%e6%b5%81%e5%a4%84%e7%90%86%e6%8a%80%e6%9c%af\/","url":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%9c%a8databricks%e4%b8%ad%e5%85%a8%e9%9d%a2%e8%bf%90%e7%94%a8%e7%bb%93%e6%9e%84%e5%8c%96%e6%b5%81%e5%a4%84%e7%90%86%e6%8a%80%e6%9c%af\/","name":"\u5728Databricks\u4e2d\u5168\u9762\u8fd0\u7528\u7ed3\u6784\u5316\u6d41\u5904\u7406\u6280\u672f - Blog - Silicon Cloud","isPartOf":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website"},"datePublished":"2024-01-15T16:37:15+00:00","dateModified":"2024-04-29T14:56:14+00:00","author":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/8ca01ba7f7362ad4edb7da206a12f29e"},"breadcrumb":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%9c%a8databricks%e4%b8%ad%e5%85%a8%e9%9d%a2%e8%bf%90%e7%94%a8%e7%bb%93%e6%9e%84%e5%8c%96%e6%b5%81%e5%a4%84%e7%90%86%e6%8a%80%e6%9c%af\/#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.silicloud.com\/zh\/blog\/%e5%9c%a8databricks%e4%b8%ad%e5%85%a8%e9%9d%a2%e8%bf%90%e7%94%a8%e7%bb%93%e6%9e%84%e5%8c%96%e6%b5%81%e5%a4%84%e7%90%86%e6%8a%80%e6%9c%af\/"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%9c%a8databricks%e4%b8%ad%e5%85%a8%e9%9d%a2%e8%bf%90%e7%94%a8%e7%bb%93%e6%9e%84%e5%8c%96%e6%b5%81%e5%a4%84%e7%90%86%e6%8a%80%e6%9c%af\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.silicloud.com\/zh\/blog\/"},{"@type":"ListItem","position":2,"name":"\u5728Databricks\u4e2d\u5168\u9762\u8fd0\u7528\u7ed3\u6784\u5316\u6d41\u5904\u7406\u6280\u672f"}]},{"@type":"WebSite","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website","url":"https:\/\/www.silicloud.com\/zh\/blog\/","name":"Blog - Silicon Cloud","description":"","inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/8ca01ba7f7362ad4edb7da206a12f29e","name":"\u79d1, \u9896","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/","url":"https:\/\/secure.gravatar.com\/avatar\/8a6fb3cc7ba2f69d2189ba532aec4633ea7ed75ac0af162ec367cb3abc0fb2af?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/8a6fb3cc7ba2f69d2189ba532aec4633ea7ed75ac0af162ec367cb3abc0fb2af?s=96&d=mm&r=g","caption":"\u79d1, \u9896"},"url":"https:\/\/www.silicloud.com\/zh\/blog\/author\/keying\/"},{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%9c%a8databricks%e4%b8%ad%e5%85%a8%e9%9d%a2%e8%bf%90%e7%94%a8%e7%bb%93%e6%9e%84%e5%8c%96%e6%b5%81%e5%a4%84%e7%90%86%e6%8a%80%e6%9c%af\/#local-main-organization-logo","url":"","contentUrl":"","caption":"Blog - Silicon Cloud"}]}},"_links":{"self":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/47148","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/users\/7"}],"replies":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/comments?post=47148"}],"version-history":[{"count":2,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/47148\/revisions"}],"predecessor-version":[{"id":88147,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/47148\/revisions\/88147"}],"wp:attachment":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/media?parent=47148"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/categories?post=47148"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/tags?post=47148"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}