{"id":50537,"date":"2023-06-18T09:51:44","date_gmt":"2022-11-22T14:15:02","guid":{"rendered":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bb%8b%e7%bb%8d%e7%94%a8%e4%ba%8e%e5%a4%a7%e8%a7%84%e6%a8%a1%e6%95%b0%e6%8d%ae%e7%a7%91%e5%ad%a6%e7%9a%84apache-spark%e6%95%b0%e6%8d%ae%e6%a1%86%e6%9e%b6%e3%80%82\/"},"modified":"2024-04-30T09:27:02","modified_gmt":"2024-04-30T01:27:02","slug":"%e4%bb%8b%e7%bb%8d%e7%94%a8%e4%ba%8e%e5%a4%a7%e8%a7%84%e6%a8%a1%e6%95%b0%e6%8d%ae%e7%a7%91%e5%ad%a6%e7%9a%84apache-spark%e6%95%b0%e6%8d%ae%e6%a1%86%e6%9e%b6%e3%80%82","status":"publish","type":"post","link":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bb%8b%e7%bb%8d%e7%94%a8%e4%ba%8e%e5%a4%a7%e8%a7%84%e6%a8%a1%e6%95%b0%e6%8d%ae%e7%a7%91%e5%ad%a6%e7%9a%84apache-spark%e6%95%b0%e6%8d%ae%e6%a1%86%e6%9e%b6%e3%80%82\/","title":{"rendered":"\u4ecb\u7ecd\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u79d1\u5b66\u7684Apache Spark\u6570\u636e\u6846\u67b6"},"content":{"rendered":"<p>\u4ee5\u4e0b\u662f\u300aApache Spark\u6570\u636e\u6846\u67b6\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u79d1\u5b66\u300b\u7684\u7ffb\u8bd1\u3002<\/p>\n<div>\u7531\u4e8e\u8fd9\u662f2015\u5e742\u6708\u7684\u6587\u7ae0\uff0c\u6240\u4ee5\u53ef\u80fd\u5b58\u5728\u90e8\u5206\u5185\u5bb9\u5df2\u7ecf\u8fc7\u65f6\u7684\u53ef\u80fd\uff0c\u4f46\u57fa\u672c\u7684\u601d\u8def\u6ca1\u6709\u6539\u53d8\u3002<\/div>\n<p>\u6211\u5f88\u9ad8\u5174\u80fd\u591f\u5ba3\u5e03\u65b0\u7684\u6570\u636e\u6846API\u7684\u53d1\u5e03\uff0c\u8fd9\u4e2aAPI\u662f\u4e3a\u4e86\u8ba9\u66f4\u591a\u4eba\u80fd\u591f\u8f7b\u677e\u8fdb\u884c\u5927\u6570\u636e\u5206\u6790\u800c\u8bbe\u8ba1\u7684\u3002<\/p>\n<p>\u6700\u521d\uff0cApache Spark\u7684\u5f00\u6e90\u76ee\u6807\u662f\u4e3a\u901a\u7528\u7f16\u7a0b\u8bed\u8a00\uff08Java\u3001Python\u3001Scala\uff09\u63d0\u4f9b\u7b80\u5355\u7684API\uff0c\u4ee5\u5b9e\u73b0\u5206\u5e03\u5f0f\u6570\u636e\u5904\u7406\u3002\u501f\u52a9Spark\uff0c\u6211\u4eec\u53ef\u4ee5\u901a\u8fc7\u5bf9\u5206\u5e03\u5f0f\u6570\u636e\u96c6\u5408\uff08RDD\uff09\u8fdb\u884c\u529f\u80fd\u8f6c\u6362\u6765\u5b9e\u73b0\u5206\u5e03\u5f0f\u5904\u7406\u3002\u8fd9\u662f\u4e00\u4e2a\u975e\u5e38\u5f3a\u5927\u7684API\uff0c\u5b83\u53ef\u4ee5\u7528\u5341\u51e0\u884c\u4ee3\u7801\u6765\u63cf\u8ff0\u9700\u8981\u51e0\u5343\u884c\u4ee3\u7801\u6765\u8868\u8fbe\u7684\u4efb\u52a1\u3002<\/p>\n<p>Spark\u7684\u589e\u957f\u975e\u5e38\u8fc5\u901f\uff0c\u6211\u4eec\u5e0c\u671b\u4e0d\u4ec5\u4ec5\u9650\u4e8e&#8221;\u5927\u6570\u636e&#8221;\u5de5\u7a0b\u5e08\uff0c\u8fd8\u53ef\u4ee5\u8ba9\u66f4\u591a\u5e7f\u6cdb\u7684\u7528\u6237\u80fd\u591f\u5229\u7528\u5206\u5e03\u5f0f\u5904\u7406\u7684\u529f\u80fd\u3002\u4e3a\u4e86\u5b9e\u73b0\u8fd9\u4e00\u76ee\u6807\uff0c\u6211\u4eec\u5f00\u53d1\u4e86\u5168\u65b0\u7684\u6570\u636e\u6846\u67b6API\u3002\u8fd9\u4e2aAPI\u53d7\u5230R\u548cPython\uff08Pandas\uff09\u7684\u6570\u636e\u6846\u67b6\u7684\u542f\u53d1\uff0c\u4f46\u662f\u7ecf\u8fc7\u4e86\u4ece\u5934\u8bbe\u8ba1\uff0c\u4ee5\u652f\u6301\u73b0\u4ee3\u7684\u5927\u6570\u636e\u548c\u6570\u636e\u79d1\u5b66\u5e94\u7528\u3002\u4f5c\u4e3a\u73b0\u6709RDD API\u7684\u6269\u5c55\uff0c\u6570\u636e\u6846\u67b6\u63d0\u4f9b\u4ee5\u4e0b\u529f\u80fd\u3002<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u5358\u4f53\u306e\u30e9\u30c3\u30d7\u30c8\u30c3\u30d7\u306b\u304a\u3051\u308b\u30ad\u30ed\u30d0\u30a4\u30c8\u306e\u30c7\u30fc\u30bf\u304b\u3089\u3001\u5927\u898f\u6a21\u30af\u30e9\u30b9\u30bf\u30fc\u3067\u30da\u30bf\u30d0\u30a4\u30c8\u306e\u30c7\u30fc\u30bf\u306b\u30b9\u30b1\u30fc\u30eb\u3002<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u5e45\u5e83\u3044\u30c7\u30fc\u30bf\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u3001\u30b9\u30c8\u30ec\u30fc\u30b8\u30b7\u30b9\u30c6\u30e0\u3092\u30b5\u30dd\u30fc\u30c8\u3002<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">Spark SQL Catalyst\u30aa\u30d7\u30c6\u30a3\u30de\u30a4\u30b6\u30fc\u306b\u3088\u308b\u6700\u65b0\u306e\u6700\u9069\u5316\u3001\u30b3\u30fc\u30c9\u751f\u6210\u3002<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">Spark\u3092\u4ecb\u3057\u305f\u30d3\u30c3\u30b0\u30c7\u30fc\u30bf\u30c4\u30fc\u30eb\u3068\u30a4\u30f3\u30d5\u30e9\u30b9\u30c8\u30e9\u30af\u30c1\u30e3\u306e\u30b7\u30fc\u30e0\u30ec\u30b9\u306a\u7d71\u5408\u3002<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">Python\u3001Java\u3001Scala\u3001R\u5411\u3051API\u306e\u63d0\u4f9b\u3002<\/ul>\n<p>\u5bf9\u4e8e\u719f\u6089\u5176\u4ed6\u8bed\u8a00\u7684\u6570\u636e\u6846\u67b6\u7684\u7528\u6237\u6765\u8bf4\uff0c\u8fd9\u4e2aAPI\u80af\u5b9a\u8ba9\u4ed6\u4eec\u611f\u89c9\u50cf\u5728\u81ea\u5df1\u7684\u5bb6\u4e2d\u4e00\u6837\u3002\u800c\u5bf9\u4e8e\u73b0\u6709\u7684Spark\u7528\u6237\u6765\u8bf4\uff0c\u4ed6\u4eec\u53ef\u4ee5\u901a\u8fc7\u4f7f\u7528\u8fd9\u4e2a\u6269\u5c55API\u6765\u66f4\u5bb9\u6613\u5730\u8fdb\u884c\u7f16\u7a0b\uff0c\u5e76\u901a\u8fc7\u667a\u80fd\u4f18\u5316\u548c\u4ee3\u7801\u751f\u6210\u6765\u63d0\u9ad8\u6027\u80fd\u3002<\/p>\n<h1>\u6570\u636e\u6846\u662f\u4ec0\u4e48\uff1f<\/h1>\n<p>\u5728Spark\u4e2d\uff0c\u6570\u636e\u5e27\u662f\u4e00\u79cd\u5206\u5e03\u5f0f\u96c6\u5408\uff0c\u53ef\u4ee5\u901a\u8fc7\u547d\u540d\u5217\u5bf9\u6570\u636e\u8fdb\u884c\u7ec4\u7ec7\u3002\u4ece\u6982\u5ff5\u4e0a\u8bb2\uff0c\u6570\u636e\u5e27\u7c7b\u4f3c\u4e8e\u5173\u7cfb\u578b\u6570\u636e\u5e93\u4e2d\u7684\u8868\u683c\u6216\u8005R\/Python\u4e2d\u7684\u6570\u636e\u5e27\uff0c\u4f46\u5728\u5185\u90e8\u8fdb\u884c\u4e86\u5404\u79cd\u4f18\u5316\u3002\u6570\u636e\u5e27\u53ef\u4ee5\u901a\u8fc7\u7ed3\u6784\u5316\u6570\u636e\u6587\u4ef6\u3001Hive\u8868\u3001\u5916\u90e8\u6570\u636e\u5e93\u3001\u73b0\u6709\u7684RDD\u7b49\u5404\u79cd\u6570\u636e\u6e90\u6784\u5efa\u3002<\/p>\n<p>\u5728\u4ee5\u4e0b\u793a\u4f8b\u4e2d\uff0c\u5c55\u793a\u4e86\u5982\u4f55\u4f7f\u7528Python\u6784\u5efa\u6570\u636e\u6846\u3002\u7c7b\u4f3c\u7684API\u4e5f\u53ef\u5728Scala\u3001Java\u548cR\u4e2d\u4f7f\u7528\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"c1\"># Constructs a DataFrame from the users table in Hive.\r\n<\/span><span class=\"n\">users<\/span> <span class=\"o\">=<\/span> <span class=\"n\">context<\/span><span class=\"p\">.<\/span><span class=\"n\">table<\/span><span class=\"p\">(<\/span><span class=\"s\">\"users\"<\/span><span class=\"p\">)<\/span>\r\n\r\n<span class=\"c1\"># from JSON files in S3\r\n<\/span><span class=\"n\">logs<\/span> <span class=\"o\">=<\/span> <span class=\"n\">context<\/span><span class=\"p\">.<\/span><span class=\"n\">load<\/span><span class=\"p\">(<\/span><span class=\"s\">\"s3n:\/\/path\/to\/data.json\"<\/span><span class=\"p\">,<\/span> <span class=\"s\">\"json\"<\/span><span class=\"p\">)<\/span>\r\n<\/code><\/pre>\n<h1>\u600e\u6837\u4f7f\u7528\u6570\u636e\u6846\uff08Data Frame\uff09\u5462\uff1f<\/h1>\n<p>\u5728\u6784\u5efa\u5b8c\u6210\u4e4b\u540e\uff0c\u6570\u636e\u6846\u5c06\u63d0\u4f9b\u7279\u5b9a\u9886\u57df\u8bed\u8a00\u7528\u4e8e\u5206\u5e03\u5f0f\u6570\u636e\u5904\u7406\u3002\u4ee5\u4e0b\u793a\u4f8b\u5c55\u793a\u4e86\u5982\u4f55\u4f7f\u7528\u6570\u636e\u6846\u6765\u64cd\u4f5c\u5927\u89c4\u6a21\u7528\u6237\u7684\u4eba\u53e3\u7edf\u8ba1\u6570\u636e\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"c1\"># Create a new DataFrame that contains \u201cyoung users\u201d only\r\n<\/span><span class=\"n\">young<\/span> <span class=\"o\">=<\/span> <span class=\"n\">users<\/span><span class=\"p\">.<\/span><span class=\"nb\">filter<\/span><span class=\"p\">(<\/span><span class=\"n\">users<\/span><span class=\"p\">.<\/span><span class=\"n\">age<\/span> <span class=\"o\">&lt;<\/span> <span class=\"mi\">21<\/span><span class=\"p\">)<\/span>\r\n\r\n<span class=\"c1\"># Alternatively, using Pandas-like syntax\r\n<\/span><span class=\"n\">young<\/span> <span class=\"o\">=<\/span> <span class=\"n\">users<\/span><span class=\"p\">[<\/span><span class=\"n\">users<\/span><span class=\"p\">.<\/span><span class=\"n\">age<\/span> <span class=\"o\">&lt;<\/span> <span class=\"mi\">21<\/span><span class=\"p\">]<\/span>\r\n\r\n<span class=\"c1\"># Increment everybody\u2019s age by 1\r\n<\/span><span class=\"n\">young<\/span><span class=\"p\">.<\/span><span class=\"n\">select<\/span><span class=\"p\">(<\/span><span class=\"n\">young<\/span><span class=\"p\">.<\/span><span class=\"n\">name<\/span><span class=\"p\">,<\/span> <span class=\"n\">young<\/span><span class=\"p\">.<\/span><span class=\"n\">age<\/span> <span class=\"o\">+<\/span> <span class=\"mi\">1<\/span><span class=\"p\">)<\/span>\r\n\r\n<span class=\"c1\"># Count the number of young users by gender\r\n<\/span><span class=\"n\">young<\/span><span class=\"p\">.<\/span><span class=\"n\">groupBy<\/span><span class=\"p\">(<\/span><span class=\"s\">\"gender\"<\/span><span class=\"p\">).<\/span><span class=\"n\">count<\/span><span class=\"p\">()<\/span>\r\n\r\n<span class=\"c1\"># Join young users with another DataFrame called logs\r\n<\/span><span class=\"n\">young<\/span><span class=\"p\">.<\/span><span class=\"n\">join<\/span><span class=\"p\">(<\/span><span class=\"n\">logs<\/span><span class=\"p\">,<\/span> <span class=\"n\">logs<\/span><span class=\"p\">.<\/span><span class=\"n\">userId<\/span> <span class=\"o\">==<\/span> <span class=\"n\">users<\/span><span class=\"p\">.<\/span><span class=\"n\">userId<\/span><span class=\"p\">,<\/span> <span class=\"s\">\"left_outer\"<\/span><span class=\"p\">)<\/span>\r\n<\/code><\/pre>\n<p>\u901a\u8fc7\u4f7f\u7528Spark SQL\uff0c\u60a8\u53ef\u4ee5\u5728\u64cd\u4f5c\u6570\u636e\u5e27\u65f6\u4f7f\u7528SQL\u3002\u5728\u4e0b\u9762\u7684\u793a\u4f8b\u4e2d\uff0c\u6211\u4eec\u8ba1\u7b97\u4e86young\u6570\u636e\u5e27\u4e2d\u7684\u7528\u6237\u6570\u91cf\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"n\">young<\/span><span class=\"p\">.<\/span><span class=\"n\">registerTempTable<\/span><span class=\"p\">(<\/span><span class=\"s\">\"young\"<\/span><span class=\"p\">)<\/span>\r\n<span class=\"n\">context<\/span><span class=\"p\">.<\/span><span class=\"n\">sql<\/span><span class=\"p\">(<\/span><span class=\"s\">\"SELECT count(*) FROM young\"<\/span><span class=\"p\">)<\/span>\r\n<\/code><\/pre>\n<p>\u5728Python\u4e2d\uff0c\u60a8\u53ef\u4ee5\u81ea\u7531\u5730\u5c06Pandas\u6570\u636e\u6846\u548cSpark\u6570\u636e\u6846\u8fdb\u884c\u8f6c\u6362\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"c1\"># Convert Spark DataFrame to Pandas\r\n<\/span><span class=\"n\">pandas_df<\/span> <span class=\"o\">=<\/span> <span class=\"n\">young<\/span><span class=\"p\">.<\/span><span class=\"n\">toPandas<\/span><span class=\"p\">()<\/span>\r\n\r\n<span class=\"c1\"># Create a Spark DataFrame from Pandas\r\n<\/span><span class=\"n\">spark_df<\/span> <span class=\"o\">=<\/span> <span class=\"n\">context<\/span><span class=\"p\">.<\/span><span class=\"n\">createDataFrame<\/span><span class=\"p\">(<\/span><span class=\"n\">pandas_df<\/span><span class=\"p\">)<\/span>\r\n<\/code><\/pre>\n<p>\u6570\u636e\u5e27\u4e0eRDD\u7c7b\u4f3c\uff0c\u4ee5\u60f0\u6027\u65b9\u5f0f\u8fdb\u884c\u8bc4\u4f30\u3002\u4e5f\u5c31\u662f\u8bf4\uff0c\u53ea\u6709\u5728\u9700\u8981\u6267\u884c\u64cd\u4f5c\uff08\u4f8b\u5982\u663e\u793a\u7ed3\u679c\uff0c\u4fdd\u5b58\u8f93\u51fa\uff09\u65f6\u624d\u8fdb\u884c\u8ba1\u7b97\u3002\u901a\u8fc7\u8fd9\u79cd\u65b9\u5f0f\uff0c\u53ef\u4ee5\u5e94\u7528\u5728\u201c\u5185\u90e8\u5904\u7406\uff1a\u667a\u80fd\u4f18\u5316\u548c\u4ee3\u7801\u751f\u6210\u201d\u4e2d\u4ecb\u7ecd\u7684\u8c13\u8bcd\u63a8\u9001\u548c\u5b57\u8282\u7801\u751f\u6210\u7b49\u6280\u672f\u6765\u4f18\u5316\u5904\u7406\u3002\u6240\u6709\u6570\u636e\u5e27\u64cd\u4f5c\u90fd\u81ea\u52a8\u5e76\u884c\u5206\u5e03\u5728\u96c6\u7fa4\u4e0a\u3002<\/p>\n<h1>\u6570\u636e\u683c\u5f0f\u548c\u6570\u636e\u6e90\u7684\u652f\u6301\u3002<\/h1>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d91f9913a08637a6e127e\/20-0.png\" alt=\"\" \/><\/div>\n<pre class=\"post-pre\"><code><span class=\"n\">users<\/span> <span class=\"o\">=<\/span> <span class=\"n\">context<\/span><span class=\"p\">.<\/span><span class=\"n\">jdbc<\/span><span class=\"p\">(<\/span><span class=\"s\">\"jdbc:postgresql:production\"<\/span><span class=\"p\">,<\/span> <span class=\"s\">\"users\"<\/span><span class=\"p\">)<\/span>\r\n<span class=\"n\">logs<\/span> <span class=\"o\">=<\/span> <span class=\"n\">context<\/span><span class=\"p\">.<\/span><span class=\"n\">load<\/span><span class=\"p\">(<\/span><span class=\"s\">\"\/path\/to\/traffic.log\"<\/span><span class=\"p\">)<\/span>\r\n<span class=\"n\">logs<\/span><span class=\"p\">.<\/span><span class=\"n\">join<\/span><span class=\"p\">(<\/span><span class=\"n\">users<\/span><span class=\"p\">,<\/span> <span class=\"n\">logs<\/span><span class=\"p\">.<\/span><span class=\"n\">userId<\/span> <span class=\"o\">==<\/span> <span class=\"n\">users<\/span><span class=\"p\">.<\/span><span class=\"n\">userId<\/span><span class=\"p\">,<\/span> <span class=\"s\">\"left_outer\"<\/span><span class=\"p\">)<\/span> \\\r\n<span class=\"p\">.<\/span><span class=\"n\">groupBy<\/span><span class=\"p\">(<\/span><span class=\"s\">\"userId\"<\/span><span class=\"p\">).<\/span><span class=\"n\">agg<\/span><span class=\"p\">({<\/span><span class=\"s\">\"*\"<\/span><span class=\"p\">:<\/span> <span class=\"s\">\"count\"<\/span><span class=\"p\">})<\/span>\r\n<\/code><\/pre>\n<h1>\u5e94\u7528\u7a0b\u5e8f\uff1a\u5148\u8fdb\u7684\u5206\u6790\u3001\u673a\u5668\u5b66\u4e60<\/h1>\n<p>\u6570\u636e\u79d1\u5b66\u5bb6\u6b63\u5728\u5f00\u59cb\u5229\u7528\u6bd4\u8fde\u63a5\u548c\u805a\u5408\u66f4\u9ad8\u7ea7\u7684\u6280\u672f\u3002\u4e3a\u4e86\u5e94\u5bf9\u8fd9\u4e00\u8d8b\u52bf\uff0c\u60a8\u53ef\u4ee5\u76f4\u63a5\u5728MLlib\u7684\u673a\u5668\u5b66\u4e60\u7ba1\u9053API\u4e2d\u4f7f\u7528\u6570\u636e\u6846\u67b6\u3002\u6b64\u5916\uff0c\u60a8\u8fd8\u53ef\u4ee5\u5bf9\u6570\u636e\u6846\u67b6\u5e94\u7528\u4efb\u610f\u590d\u6742\u7684\u7528\u6237\u51fd\u6570\u3002<\/p>\n<p>\u53ef\u4ee5\u4f7f\u7528MLlib\u7684\u65b0\u7ba1\u9053API\u6765\u6307\u5b9a\u6700\u5e38\u89c1\u7684\u5148\u8fdb\u5206\u6790\u4efb\u52a1\u3002\u4f8b\u5982\uff0c\u4e0b\u9762\u7684\u4ee3\u7801\u6784\u5efa\u4e86\u4e00\u4e2a\u7b80\u5355\u7684\u6587\u672c\u5206\u7c7b\u7ba1\u9053\uff0c\u7531\u5206\u8bcd\u5668\u3001\u54c8\u5e0c\u5355\u8bcd\u9891\u7387\u7279\u5f81\u63d0\u53d6\u5668\u548c\u903b\u8f91\u56de\u5f52\u7ec4\u6210\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"n\">tokenizer<\/span> <span class=\"o\">=<\/span> <span class=\"n\">Tokenizer<\/span><span class=\"p\">(<\/span><span class=\"n\">inputCol<\/span><span class=\"o\">=<\/span><span class=\"s\">\"text\"<\/span><span class=\"p\">,<\/span> <span class=\"n\">outputCol<\/span><span class=\"o\">=<\/span><span class=\"s\">\"words\"<\/span><span class=\"p\">)<\/span>\r\n<span class=\"n\">hashingTF<\/span> <span class=\"o\">=<\/span> <span class=\"n\">HashingTF<\/span><span class=\"p\">(<\/span><span class=\"n\">inputCol<\/span><span class=\"o\">=<\/span><span class=\"s\">\"words\"<\/span><span class=\"p\">,<\/span> <span class=\"n\">outputCol<\/span><span class=\"o\">=<\/span><span class=\"s\">\"features\"<\/span><span class=\"p\">)<\/span>\r\n<span class=\"n\">lr<\/span> <span class=\"o\">=<\/span> <span class=\"n\">LogisticRegression<\/span><span class=\"p\">(<\/span><span class=\"n\">maxIter<\/span><span class=\"o\">=<\/span><span class=\"mi\">10<\/span><span class=\"p\">,<\/span> <span class=\"n\">regParam<\/span><span class=\"o\">=<\/span><span class=\"mf\">0.01<\/span><span class=\"p\">)<\/span>\r\n<span class=\"n\">pipeline<\/span> <span class=\"o\">=<\/span> <span class=\"n\">Pipeline<\/span><span class=\"p\">(<\/span><span class=\"n\">stages<\/span><span class=\"o\">=<\/span><span class=\"p\">[<\/span><span class=\"n\">tokenizer<\/span><span class=\"p\">,<\/span> <span class=\"n\">hashingTF<\/span><span class=\"p\">,<\/span> <span class=\"n\">lr<\/span><span class=\"p\">])<\/span>\r\n<\/code><\/pre>\n<p>\u4e00\u65e6\u5efa\u7acb\u4e86\u6570\u636e\u7ba1\u9053\uff0c\u5c31\u53ef\u4ee5\u76f4\u63a5\u5728\u6570\u636e\u6846\u4e0a\u8fdb\u884c\u8bad\u7ec3\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"n\">df<\/span> <span class=\"o\">=<\/span> <span class=\"n\">context<\/span><span class=\"p\">.<\/span><span class=\"n\">load<\/span><span class=\"p\">(<\/span><span class=\"s\">\"\/path\/to\/data\"<\/span><span class=\"p\">)<\/span>\r\n<span class=\"n\">model<\/span> <span class=\"o\">=<\/span> <span class=\"n\">pipeline<\/span><span class=\"p\">.<\/span><span class=\"n\">fit<\/span><span class=\"p\">(<\/span><span class=\"n\">df<\/span><span class=\"p\">)<\/span>\r\n<\/code><\/pre>\n<p>\u5728\u6bd4\u673a\u5668\u5b66\u4e60\u7ba1\u9053API\u66f4\u4e3a\u590d\u6742\u7684\u4efb\u52a1\u4e2d\uff0c\u5e94\u7528\u7a0b\u5e8f\u53ef\u4ee5\u5bf9\u6570\u636e\u6846\u5e94\u7528\u4efb\u610f\u51fd\u6570\u3002\u6b64\u5916\uff0c\u8fd8\u53ef\u4ee5\u4f7f\u7528Spark\u7684\u73b0\u6709RDD API\u8fdb\u884c\u64cd\u4f5c\u3002\u4e0b\u9762\u7684\u4ee3\u7801\u7247\u6bb5\u6f14\u793a\u4e86\u5bf9\u6570\u636e\u6846\u7684bio\u5217\u6267\u884c\u7c7b\u4f3c\u4e8e\u5927\u6570\u636e\u4e2d\u7684&#8221;Hello World&#8221;\u7684\u8bcd\u9891\u7edf\u8ba1\u4efb\u52a1\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"n\">df<\/span> <span class=\"o\">=<\/span> <span class=\"n\">context<\/span><span class=\"p\">.<\/span><span class=\"n\">load<\/span><span class=\"p\">(<\/span><span class=\"s\">\"\/path\/to\/people.json\"<\/span><span class=\"p\">)<\/span>\r\n<span class=\"c1\"># RDD-style methods such as map, flatMap are available on DataFrames\r\n# Split the bio text into multiple words.\r\n<\/span><span class=\"n\">words<\/span> <span class=\"o\">=<\/span> <span class=\"n\">df<\/span><span class=\"p\">.<\/span><span class=\"n\">select<\/span><span class=\"p\">(<\/span><span class=\"s\">\"bio\"<\/span><span class=\"p\">).<\/span><span class=\"n\">flatMap<\/span><span class=\"p\">(<\/span><span class=\"k\">lambda<\/span> <span class=\"n\">row<\/span><span class=\"p\">:<\/span> <span class=\"n\">row<\/span><span class=\"p\">.<\/span><span class=\"n\">bio<\/span><span class=\"p\">.<\/span><span class=\"n\">split<\/span><span class=\"p\">(<\/span><span class=\"s\">\" \"<\/span><span class=\"p\">))<\/span>\r\n<span class=\"c1\"># Create a new DataFrame to count the number of words\r\n<\/span><span class=\"n\">words_df<\/span> <span class=\"o\">=<\/span> <span class=\"n\">words<\/span><span class=\"p\">.<\/span><span class=\"nb\">map<\/span><span class=\"p\">(<\/span><span class=\"k\">lambda<\/span> <span class=\"n\">w<\/span><span class=\"p\">:<\/span> <span class=\"n\">Row<\/span><span class=\"p\">(<\/span><span class=\"n\">word<\/span><span class=\"o\">=<\/span><span class=\"n\">w<\/span><span class=\"p\">,<\/span> <span class=\"n\">cnt<\/span><span class=\"o\">=<\/span><span class=\"mi\">1<\/span><span class=\"p\">)).<\/span><span class=\"n\">toDF<\/span><span class=\"p\">()<\/span>\r\n<span class=\"n\">word_counts<\/span> <span class=\"o\">=<\/span> <span class=\"n\">words_df<\/span><span class=\"p\">.<\/span><span class=\"n\">groupBy<\/span><span class=\"p\">(<\/span><span class=\"s\">\"word\"<\/span><span class=\"p\">).<\/span><span class=\"nb\">sum<\/span><span class=\"p\">()<\/span>\r\n<\/code><\/pre>\n<h1>\u5185\u90e8\u5904\u7406\uff1a\u667a\u80fd\u4f18\u5316\u548c\u4ee3\u7801\u751f\u6210<\/h1>\n<p>\u4e0eR\u6216Python\u4e2d\u7684\u7acb\u5373\u8bc4\u4f30\u6570\u636e\u6846\u4e0d\u540c\uff0cSpark\u7684\u6570\u636e\u6846\u5177\u6709\u81ea\u8eab\u7684\u6267\u884c\u8fc7\u7a0b\uff0c\u5e76\u7531\u67e5\u8be2\u4f18\u5316\u5668\u81ea\u52a8\u4f18\u5316\u3002\u5728\u5bf9\u6570\u636e\u6846\u8fdb\u884c\u8ba1\u7b97\u4e4b\u524d\uff0cCatalyst\u4f18\u5316\u5668\u4f1a\u5c06\u6784\u5efa\u6570\u636e\u6846\u6240\u9700\u7684\u64cd\u4f5c\u7f16\u8bd1\u4e3a\u7269\u7406\u6267\u884c\u8ba1\u5212\u3002\u4f18\u5316\u5668\u53ef\u4ee5\u7406\u89e3\u64cd\u4f5c\u7684\u8bed\u4e49\u548c\u6570\u636e\u7ed3\u6784\uff0c\u4ece\u800c\u80fd\u591f\u505a\u51fa\u660e\u667a\u7684\u51b3\u7b56\u4ee5\u52a0\u5feb\u5904\u7406\u901f\u5ea6\u3002<\/p>\n<p>\u5728\u9ad8\u7ea7\u6c34\u5e73\u4e0a\uff0c\u5b58\u5728\u7740\u4e24\u79cd\u7c7b\u578b\u7684\u4f18\u5316\u3002\u9996\u5148\uff0cCatalyst\u5e94\u7528\u4e86\u903b\u8f91\u4f18\u5316\uff0c\u4f8b\u5982\u8c13\u8bcd\u4e0b\u63a8\u3002\u4f18\u5316\u5668\u4f1a\u5c06\u8c13\u8bcd\u8fc7\u6ee4\u5668\u4e0b\u63a8\u5230\u6570\u636e\u6e90\u4e2d\uff0c\u4ee5\u4fbf\u8df3\u8fc7\u4e0d\u9700\u8981\u8fdb\u884c\u7269\u7406\u6267\u884c\u7684\u6570\u636e\u3002\u5bf9\u4e8eParquet\u6587\u4ef6\u800c\u8a00\uff0c\u53ef\u4ee5\u8df3\u8fc7\u6574\u4e2a\u6570\u636e\u5757\uff0c\u5e76\u901a\u8fc7\u5b57\u5178\u7f16\u7801\u5c06\u5b57\u7b26\u4e32\u6bd4\u8f83\u8f6c\u6362\u4e3a\u66f4\u4f4e\u6210\u672c\u7684\u6574\u6570\u6bd4\u8f83\u3002\u5bf9\u4e8e\u5173\u7cfb\u578b\u6570\u636e\u5e93\u800c\u8a00\uff0c\u8c13\u8bcd\u4f1a\u88ab\u4e0b\u63a8\u5230\u5916\u90e8\u6570\u636e\u5e93\uff0c\u5e76\u53ef\u4ee5\u51cf\u5c11\u6570\u636e\u6d41\u91cf\u3002<\/p>\n<p>\u4e0b\u4e00\u6b65\uff0cCatalyst\u4f1a\u5c06\u64cd\u4f5c\u7f16\u8bd1\u6210\u7269\u7406\u6267\u884c\u8ba1\u5212\uff0c\u5e76\u751f\u6210\u76f8\u5e94\u7684JVM\u5b57\u8282\u7801\uff0c\u4ee5\u4fbf\u4e8e\u4f18\u5316\u6bd4\u624b\u5199\u4ee3\u7801\u66f4\u597d\u3002\u4f8b\u5982\uff0c\u4e3a\u4e86\u51cf\u5c11\u7f51\u7edc\u6d41\u91cf\uff0c\u53ef\u4ee5\u9002\u5f53\u9009\u62e9\u5e7f\u64ad\u8fde\u63a5\u6216\u6d17\u724c\u8fde\u63a5\u3002\u8fd8\u4f1a\u8fdb\u884c\u4f4e\u7ea7\u522b\u7684\u4f18\u5316\uff0c\u4f8b\u5982\u6392\u9664\u9ad8\u6210\u672c\u5bf9\u8c61\u5206\u914d\u548c\u51cf\u5c11\u865a\u62df\u51fd\u6570\u8c03\u7528\u7b49\u3002\u56e0\u6b64\uff0c\u901a\u8fc7\u5c06\u73b0\u6709\u7684Spark\u7a0b\u5e8f\u8fc1\u79fb\u5230\u6570\u636e\u6846\u67b6\uff0c\u53ef\u4ee5\u671f\u671b\u6027\u80fd\u6539\u8fdb\u3002<\/p>\n<div><img decoding=\"async\" class=\"post-images\" title=\"\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d91f9913a08637a6e127e\/34-0.png\" alt=\"\" \/><\/div>\n<p>\u6570\u636e\u6846\u67b6\u53d7\u5230\u4e86\u4e4b\u524d\u7684\u5206\u5e03\u5f0f\u6570\u636e\u6846\u67b6\u9879\u76eeAdatao\u7684DDF\u548cAyasdi\u7684BigDF\u7684\u542f\u53d1\u3002\u7136\u800c\uff0c\u4e0e\u8fd9\u4e9b\u9879\u76ee\u76f8\u6bd4\uff0c\u6570\u636e\u6846\u67b6\u7684\u4e3b\u8981\u533a\u522b\u662f\u5b83\u7ecf\u8fc7Catalyst\u4f18\u5316\u5668\uff0c\u5e76\u4e14\u50cfSpark SQL\u67e5\u8be2\u4e00\u6837\u8fdb\u884c\u4f18\u5316\u6267\u884c\u3002\u6211\u4eec\u4e0d\u65ad\u6539\u8fdbCatalyst\u4f18\u5316\u5668\uff0c\u4f7f\u5f97\u5f15\u64ce\u66f4\u52a0\u667a\u80fd\u5316\uff0c\u5e76\u4e14\u968f\u7740\u65b0\u7684Spark\u53d1\u5e03\uff0c\u5e94\u7528\u7a0b\u5e8f\u4e5f\u5f97\u4ee5\u52a0\u901f\u3002<\/p>\n<p>Databricks\u7684\u6570\u636e\u79d1\u5b66\u56e2\u961f\u6b63\u5728\u4f7f\u7528\u6570\u636e\u5e27API\u6765\u6784\u5efa\u5185\u90e8\u7684\u6570\u636e\u7ba1\u9053\u3002\u6570\u636e\u5e27API\u7b80\u5316\u4e86\u4f7f\u7528\u7684Spark\u7a0b\u5e8f\uff0c\u4f7f\u5176\u66f4\u6613\u4e8e\u7406\u89e3\uff0c\u5e76\u63d0\u9ad8\u4e86\u6027\u80fd\u3002\u6211\u4eec\u8ba4\u4e3a\u8fd9\u975e\u5e38\u4ee4\u4eba\u9ad8\u5174\uff0c\u5e76\u76f8\u4fe1\u66f4\u591a\u7684\u7528\u6237\u5c06\u80fd\u591f\u8bbf\u95ee\u5927\u6570\u636e\u5904\u7406\u3002<\/p>\n<p>\u8fd9\u4e2aAPI\u5c06\u4f5c\u4e3aSpark 1.3\u7684\u4e00\u90e8\u5206\u4e8e2015\u5e743\u6708\u4e0a\u65ec\u53d1\u5e03\u3002\u5982\u679c\u60a8\u8feb\u4e0d\u53ca\u5f85\uff0c\u53ef\u4ee5\u5c1d\u8bd5\u5728Github\u4e0a\u4f7f\u7528Spark\u3002<\/p>\n<p>\u6ca1\u6709\u4f7f\u7528\u8fc7\u5f80\u7684\u6570\u636e\u6846\u67b6\u5b9e\u73b0\uff0c\u662f\u65e0\u6cd5\u5b9e\u73b0\u6b64\u529f\u80fd\u7684\u3002\u5bf9R\u3001Pandas\u3001DDF\u3001BigDF\u7684\u5f00\u53d1\u8005\u4eec\u8868\u793a\u611f\u8c22\u3002<\/p>\n<h3>Databricks \u514d\u8d39\u8bd5\u7528<\/h3>\n<p>Databricks \u514d\u8d39\u8bd5\u7528<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u4ee5\u4e0b\u662f\u300aApache Spark\u6570\u636e\u6846\u67b6\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u79d1\u5b66\u300b\u7684\u7ffb\u8bd1\u3002 \u7531\u4e8e\u8fd9\u662f2015\u5e742\u6708\u7684\u6587\u7ae0\uff0c\u6240\u4ee5\u53ef\u80fd\u5b58 [&hellip;]<\/p>\n","protected":false},"author":4,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-50537","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"yoast_head":"<!-- This site is optimized with the Yoast SEO Premium plugin v21.5 (Yoast SEO v21.5) - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>\u4ecb\u7ecd\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u79d1\u5b66\u7684Apache Spark\u6570\u636e\u6846\u67b6 - Blog - Silicon Cloud<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.silicloud.com\/zh\/blog\/\u4ecb\u7ecd\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u79d1\u5b66\u7684apache-spark\u6570\u636e\u6846\u67b6\u3002\/\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u4ecb\u7ecd\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u79d1\u5b66\u7684Apache Spark\u6570\u636e\u6846\u67b6\" \/>\n<meta property=\"og:description\" content=\"\u4ee5\u4e0b\u662f\u300aApache Spark\u6570\u636e\u6846\u67b6\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u79d1\u5b66\u300b\u7684\u7ffb\u8bd1\u3002 \u7531\u4e8e\u8fd9\u662f2015\u5e742\u6708\u7684\u6587\u7ae0\uff0c\u6240\u4ee5\u53ef\u80fd\u5b58 [&hellip;]\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.silicloud.com\/zh\/blog\/\u4ecb\u7ecd\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u79d1\u5b66\u7684apache-spark\u6570\u636e\u6846\u67b6\u3002\/\" \/>\n<meta property=\"og:site_name\" content=\"Blog - Silicon Cloud\" \/>\n<meta property=\"article:published_time\" content=\"2022-11-22T14:15:02+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2024-04-30T01:27:02+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d91f9913a08637a6e127e\/20-0.png\" \/>\n<meta name=\"author\" content=\"\u79d1, \u96c5\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"\u79d1, \u96c5\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"2 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bb%8b%e7%bb%8d%e7%94%a8%e4%ba%8e%e5%a4%a7%e8%a7%84%e6%a8%a1%e6%95%b0%e6%8d%ae%e7%a7%91%e5%ad%a6%e7%9a%84apache-spark%e6%95%b0%e6%8d%ae%e6%a1%86%e6%9e%b6%e3%80%82\/\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bb%8b%e7%bb%8d%e7%94%a8%e4%ba%8e%e5%a4%a7%e8%a7%84%e6%a8%a1%e6%95%b0%e6%8d%ae%e7%a7%91%e5%ad%a6%e7%9a%84apache-spark%e6%95%b0%e6%8d%ae%e6%a1%86%e6%9e%b6%e3%80%82\/\",\"name\":\"\u4ecb\u7ecd\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u79d1\u5b66\u7684Apache Spark\u6570\u636e\u6846\u67b6 - Blog - Silicon Cloud\",\"isPartOf\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\"},\"datePublished\":\"2022-11-22T14:15:02+00:00\",\"dateModified\":\"2024-04-30T01:27:02+00:00\",\"author\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/41e222757cdd2a3365361328bd79970a\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bb%8b%e7%bb%8d%e7%94%a8%e4%ba%8e%e5%a4%a7%e8%a7%84%e6%a8%a1%e6%95%b0%e6%8d%ae%e7%a7%91%e5%ad%a6%e7%9a%84apache-spark%e6%95%b0%e6%8d%ae%e6%a1%86%e6%9e%b6%e3%80%82\/#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bb%8b%e7%bb%8d%e7%94%a8%e4%ba%8e%e5%a4%a7%e8%a7%84%e6%a8%a1%e6%95%b0%e6%8d%ae%e7%a7%91%e5%ad%a6%e7%9a%84apache-spark%e6%95%b0%e6%8d%ae%e6%a1%86%e6%9e%b6%e3%80%82\/\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bb%8b%e7%bb%8d%e7%94%a8%e4%ba%8e%e5%a4%a7%e8%a7%84%e6%a8%a1%e6%95%b0%e6%8d%ae%e7%a7%91%e5%ad%a6%e7%9a%84apache-spark%e6%95%b0%e6%8d%ae%e6%a1%86%e6%9e%b6%e3%80%82\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.silicloud.com\/zh\/blog\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u4ecb\u7ecd\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u79d1\u5b66\u7684Apache Spark\u6570\u636e\u6846\u67b6\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/\",\"name\":\"Blog - Silicon Cloud\",\"description\":\"\",\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/41e222757cdd2a3365361328bd79970a\",\"name\":\"\u79d1, \u96c5\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/secure.gravatar.com\/avatar\/1b2d3e00a7df03689797ebd4af8c5827ba5af936849a71050ec331f4cf902c5d?s=96&d=mm&r=g\",\"contentUrl\":\"https:\/\/secure.gravatar.com\/avatar\/1b2d3e00a7df03689797ebd4af8c5827ba5af936849a71050ec331f4cf902c5d?s=96&d=mm&r=g\",\"caption\":\"\u79d1, \u96c5\"},\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/author\/keya\/\"},{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bb%8b%e7%bb%8d%e7%94%a8%e4%ba%8e%e5%a4%a7%e8%a7%84%e6%a8%a1%e6%95%b0%e6%8d%ae%e7%a7%91%e5%ad%a6%e7%9a%84apache-spark%e6%95%b0%e6%8d%ae%e6%a1%86%e6%9e%b6%e3%80%82\/#local-main-organization-logo\",\"url\":\"\",\"contentUrl\":\"\",\"caption\":\"Blog - Silicon Cloud\"}]}<\/script>\n<!-- \/ Yoast SEO Premium plugin. -->","yoast_head_json":{"title":"\u4ecb\u7ecd\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u79d1\u5b66\u7684Apache Spark\u6570\u636e\u6846\u67b6 - Blog - Silicon Cloud","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.silicloud.com\/zh\/blog\/\u4ecb\u7ecd\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u79d1\u5b66\u7684apache-spark\u6570\u636e\u6846\u67b6\u3002\/","og_locale":"zh_CN","og_type":"article","og_title":"\u4ecb\u7ecd\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u79d1\u5b66\u7684Apache Spark\u6570\u636e\u6846\u67b6","og_description":"\u4ee5\u4e0b\u662f\u300aApache Spark\u6570\u636e\u6846\u67b6\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u79d1\u5b66\u300b\u7684\u7ffb\u8bd1\u3002 \u7531\u4e8e\u8fd9\u662f2015\u5e742\u6708\u7684\u6587\u7ae0\uff0c\u6240\u4ee5\u53ef\u80fd\u5b58 [&hellip;]","og_url":"https:\/\/www.silicloud.com\/zh\/blog\/\u4ecb\u7ecd\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u79d1\u5b66\u7684apache-spark\u6570\u636e\u6846\u67b6\u3002\/","og_site_name":"Blog - Silicon Cloud","article_published_time":"2022-11-22T14:15:02+00:00","article_modified_time":"2024-04-30T01:27:02+00:00","og_image":[{"url":"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d91f9913a08637a6e127e\/20-0.png"}],"author":"\u79d1, \u96c5","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"\u79d1, \u96c5","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"2 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bb%8b%e7%bb%8d%e7%94%a8%e4%ba%8e%e5%a4%a7%e8%a7%84%e6%a8%a1%e6%95%b0%e6%8d%ae%e7%a7%91%e5%ad%a6%e7%9a%84apache-spark%e6%95%b0%e6%8d%ae%e6%a1%86%e6%9e%b6%e3%80%82\/","url":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bb%8b%e7%bb%8d%e7%94%a8%e4%ba%8e%e5%a4%a7%e8%a7%84%e6%a8%a1%e6%95%b0%e6%8d%ae%e7%a7%91%e5%ad%a6%e7%9a%84apache-spark%e6%95%b0%e6%8d%ae%e6%a1%86%e6%9e%b6%e3%80%82\/","name":"\u4ecb\u7ecd\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u79d1\u5b66\u7684Apache Spark\u6570\u636e\u6846\u67b6 - Blog - Silicon Cloud","isPartOf":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website"},"datePublished":"2022-11-22T14:15:02+00:00","dateModified":"2024-04-30T01:27:02+00:00","author":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/41e222757cdd2a3365361328bd79970a"},"breadcrumb":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bb%8b%e7%bb%8d%e7%94%a8%e4%ba%8e%e5%a4%a7%e8%a7%84%e6%a8%a1%e6%95%b0%e6%8d%ae%e7%a7%91%e5%ad%a6%e7%9a%84apache-spark%e6%95%b0%e6%8d%ae%e6%a1%86%e6%9e%b6%e3%80%82\/#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.silicloud.com\/zh\/blog\/%e4%bb%8b%e7%bb%8d%e7%94%a8%e4%ba%8e%e5%a4%a7%e8%a7%84%e6%a8%a1%e6%95%b0%e6%8d%ae%e7%a7%91%e5%ad%a6%e7%9a%84apache-spark%e6%95%b0%e6%8d%ae%e6%a1%86%e6%9e%b6%e3%80%82\/"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bb%8b%e7%bb%8d%e7%94%a8%e4%ba%8e%e5%a4%a7%e8%a7%84%e6%a8%a1%e6%95%b0%e6%8d%ae%e7%a7%91%e5%ad%a6%e7%9a%84apache-spark%e6%95%b0%e6%8d%ae%e6%a1%86%e6%9e%b6%e3%80%82\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.silicloud.com\/zh\/blog\/"},{"@type":"ListItem","position":2,"name":"\u4ecb\u7ecd\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u79d1\u5b66\u7684Apache Spark\u6570\u636e\u6846\u67b6"}]},{"@type":"WebSite","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website","url":"https:\/\/www.silicloud.com\/zh\/blog\/","name":"Blog - Silicon Cloud","description":"","inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/41e222757cdd2a3365361328bd79970a","name":"\u79d1, \u96c5","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/","url":"https:\/\/secure.gravatar.com\/avatar\/1b2d3e00a7df03689797ebd4af8c5827ba5af936849a71050ec331f4cf902c5d?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/1b2d3e00a7df03689797ebd4af8c5827ba5af936849a71050ec331f4cf902c5d?s=96&d=mm&r=g","caption":"\u79d1, \u96c5"},"url":"https:\/\/www.silicloud.com\/zh\/blog\/author\/keya\/"},{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bb%8b%e7%bb%8d%e7%94%a8%e4%ba%8e%e5%a4%a7%e8%a7%84%e6%a8%a1%e6%95%b0%e6%8d%ae%e7%a7%91%e5%ad%a6%e7%9a%84apache-spark%e6%95%b0%e6%8d%ae%e6%a1%86%e6%9e%b6%e3%80%82\/#local-main-organization-logo","url":"","contentUrl":"","caption":"Blog - Silicon Cloud"}]}},"_links":{"self":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/50537","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/users\/4"}],"replies":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/comments?post=50537"}],"version-history":[{"count":2,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/50537\/revisions"}],"predecessor-version":[{"id":91080,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/50537\/revisions\/91080"}],"wp:attachment":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/media?parent=50537"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/categories?post=50537"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/tags?post=50537"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}