{"id":50456,"date":"2023-10-16T07:52:35","date_gmt":"2023-03-05T14:27:52","guid":{"rendered":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%85%b3%e4%ba%8espark%ef%bc%88python%e3%80%81java%e3%80%81jvm%e3%80%81rdd%ef%bc%89\/"},"modified":"2024-05-04T04:28:46","modified_gmt":"2024-05-03T20:28:46","slug":"%e5%85%b3%e4%ba%8espark%ef%bc%88python%e3%80%81java%e3%80%81jvm%e3%80%81rdd%ef%bc%89","status":"publish","type":"post","link":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%85%b3%e4%ba%8espark%ef%bc%88python%e3%80%81java%e3%80%81jvm%e3%80%81rdd%ef%bc%89\/","title":{"rendered":"\u5173\u4e8eSpark\uff08Python\u3001Java\u3001JVM\u3001RDD\uff09"},"content":{"rendered":"<h1>\u5173\u4e8eHadoop\u548cSpark\u7684\u5206\u5e03\u5f0f\u5904\u7406<\/h1>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u5de8\u5927\u30c7\u30fc\u30bf\u306e\u53d6\u308a\u6271\u3044\u3092\u76ee\u7684\u3068\u3057\u305f\u5206\u6563\u51e6\u7406\u306e\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u5206\u6563\u51e6\u7406\u306b\u3088\u3063\u3066\u30d3\u30c3\u30b0\u30c7\u30fc\u30bf\u3092\u9ad8\u901f\u306b\u51e6\u7406\u3059\u308b\u3053\u3068\u304c\u53ef\u80fd<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">Hadoop\u306e\u5229\u7528\u8005\u306f\u81ea\u4f5c\u3057\u305f\u30c7\u30fc\u30bf\u51e6\u7406\u306e\u30d7\u30ed\u30b0\u30e9\u30e0\u3084\u4ed6\u8005\u304c\u958b\u767a\u3057\u305f\u30c4\u30fc\u30eb\u30d7\u30ed\u30b0\u30e9\u30e0\u3092Hadoop\u5185\u306b\u7d44\u307f\u8fbc\u3093\u3067\u30d3\u30c3\u30b0\u30c7\u30fc\u30bf\u51e6\u7406\u304c\u53ef\u80fd<\/ul>\n<h2>\u5728Hadoop\u4e0a\u8fd0\u884c\u7684\u6570\u636e\u5e93\u7ba1\u7406\u7cfb\u7edf(DBMS)\u3002<\/h2>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">Hive<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">Impala<\/ul>\n<h2>\u5728Hadoop\u4e0a\u8fd0\u884c\u7684\u811a\u672c\u73af\u5883\u3002<\/h2>\n<ul class=\"post-ul\">Pig<\/ul>\n<p>Hadoop\u7684\u6574\u5408\u8f6f\u4ef6\u4f7f\u5927\u6570\u636e\u5904\u7406\u73af\u5883\u66f4\u52a0\u4fbf\u5229\u3002<br \/>\nSpark\u4e0eHadoop\u4e00\u6837\u662f\u5206\u5e03\u5f0f\u5904\u7406\u6846\u67b6\u3002<br \/>\nSpark\u662f\u4f7f\u7528Scala\u4f5c\u4e3a\u884d\u751f\u81eaJava\u7684\u8bed\u8a00\u5f00\u53d1\u7684\uff0c\u800cHadoop\u662f\u4f7f\u7528Java\u8bed\u8a00\u5f00\u53d1\u7684\u3002<\/p>\n<h3>Spark\u7684\u5185\u90e8\u5904\u7406\u65b9\u5f0f<\/h3>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u30c7\u30fc\u30bf\u3092\u30e1\u30e2\u30ea\u306b\u4fdd\u5b58\u3059\u308b\u3053\u3068\u3067\u5165\u51fa\u529b\u306e\u9ad8\u901f\u5316\u3092\u56f3\u308a\u51e6\u7406\u5168\u4f53\u306e\u5b9f\u884c\u901f\u5ea6\u3092\u5411\u4e0a\u3055\u305b\u3088\u3046\u3068\u3059\u308b\u53d6\u308a\u7d44\u307f\u304c\u3042\u308b\u3002<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u5229\u7528\u53ef\u80fd\u30e1\u30e2\u30ea\u304c\u67af\u6e07\u3057\u305f\u5834\u5408\u306b\u306f\u30c7\u30fc\u30bf\u3092\u30b9\u30c8\u30ec\u30fc\u30b8\u306b\u4fdd\u5b58\u3059\u308b\u30b1\u30fc\u30b9\u3082\u52d8\u6848<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u6a5f\u68b0\u5b66\u7fd2\u306e\u8a08\u7b97\u51e6\u7406\u306b\u52b9\u679c\u3092\u767a\u63ee\u3059\u308b<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\u5b9f\u969b\u7279\u5b9a\u306e\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u306b\u95a2\u3059\u308b\u5b9f\u884c\u6027\u80fd\u306f\u3001Hadoop\u306eMapReduce\u51e6\u7406\u3068\u6bd4\u3079\u305f\u5834\u5408\u306e100\u500d<\/ul>\n<h3>\u5173\u4e8e\u300c\u6570\u636e\u5b58\u50a8\u4f4d\u7f6e\u300d\u7684\u9009\u62e9\u8303\u56f4\u5e7f\u6cdb\u3002<\/h3>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">Spark\u3067\u51e6\u7406\u3092\u884c\u3046\u30c7\u30fc\u30bf\u306f\u300c\u3044\u308d\u3044\u308d\u306a\u7a2e\u985e\u306e\u30c7\u30fc\u30bf\u7f6e\u304d\u5834\u300d\u306b\u683c\u7d0d\u53ef\u80fd<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">Spark\u306f\u69d8\u3005\u306a\u30c7\u30fc\u30bf\u683c\u7d0d\u5834\u6240\u304b\u3089\u306e\u30c7\u30fc\u30bf\u5165\u51fa\u529b\u306b\u5bfe\u5fdc\u3057\u3066\u3044\u308b<\/ul>\n<blockquote><p>\u57fa\u672c\u4e0a\uff0cHadoop\u9700\u8981\u4e00\u500b\u540d\u70ba\u300cHadoop\u5206\u6563\u5f0f\u6587\u4ef6\u7cfb\u7d71\uff08HDFS\uff09\u300d\u7684\u5c08\u7528\u6587\u4ef6\u5b58\u5132\u4f4d\u7f6e\u4f5c\u70ba\u6578\u64da\u5b58\u5132\u4f4d\u7f6e\u3002<\/p><\/blockquote>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">Hadoop Distributed File System (HDFS)<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">Cassandra<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">OpenStack Swift<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">Amazon S3<\/ul>\n<p>\u53ef\u4ee5\u5904\u7406\u4e0a\u8ff0\u7b49\u7684\u5b58\u50a8\u8bbe\u5907\u3002<\/p>\n<h3>\u5173\u4e8e\u7a0b\u5e8f\u65b9\u6cd5\u7684\u9009\u62e9\u5e7f\u5ea6<\/h3>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">Java(\u76f4\u63a5Hadoop\u3092\u5236\u5fa1\u3059\u308b\u4e8b\u304b\u3089\u3053\u306e\u65b9\u6cd5\u3092\u751fHadoop\u3068\u547c\u3093\u3060\u308a\u3057\u307e\u3059)<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">HiveQL(Hadoop+Hive)<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">Pig(Hadoop+Pig)<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">Hadoop Streaming\u3092\u4f7f\u7528\u3059\u308b\u3053\u3068\u3067\u6a19\u6e96\u5165\u51fa\u529b\u3092\u4ecb\u3057\u3066Python\u306a\u3069\u304b\u3089\u5236\u5fa1<\/ul>\n<blockquote><p>\u901a\u8fc7\u4e0e&#8221;Hadoop\u4e0d\u540c\u7684\u8f6f\u4ef6&#8221;\u8fdb\u884c\u7f16\u7a0b\u662f\u5f88\u5e38\u89c1\u7684\u3002<br \/>\n\u5c3d\u7ba1Spark\u672c\u8eab\u662f\u7528Scala\u7f16\u7a0b\u7684\uff0c\u4f46\u5b83\u91c7\u7528\u4e86\u6bd4\u5176\u4ed6\u7f16\u7a0b\u8bed\u8a00\u66f4\u7d27\u5bc6\u7684\u65b9\u6cd5\u3002\u4e5f\u5c31\u662f\u8bf4\uff0c\u7528\u4e8e\u63a7\u5236Spark\u7684\u529f\u80fd\u4e0d\u4ec5\u9650\u4e8eScala\u3002<\/p><\/blockquote>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">Java<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">Python<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">R\u8a00\u8a9e<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">Spark SQL<\/ul>\n<p>\u63d0\u4f9b\u4e86\u9002\u7528\u4e8e\u5404\u79cd\u7a0b\u5e8f\u8bed\u8a00\u7684API\u3002<\/p>\n<h2>Spark\u548cHadoop\u7684\u5173\u7cfb\u5e76\u975e\u7ade\u4e89\uff0c\u800c\u662f\u5171\u5b58\u3002\u7528\u6237\u53ef\u4ee5\u6709\u66f4\u5e7f\u6cdb\u7684\u9009\u62e9\u3002<\/h2>\n<p>* \u5728Hadoop\u5185\u4f7f\u7528Yarn\u8fdb\u884c\u63a7\u5236\u5e76\u5229\u7528Spark\u4e5f\u662f\u73b0\u5b9e\u7684\u9009\u62e9\u3002<br \/>\n* Spark\u4e5f\u652f\u6301\u5c06\u6570\u636e\u8f93\u5165\u8f93\u51fa\u5230HDFS\u3002<br \/>\n* \u76ee\u524d\uff0cSpark\u548cHadoop\u5b58\u5728\u5171\u5b58\u5173\u7cfb\u3002<\/p>\n<p>Spark\u5728HDFS\u4e0a\u63a7\u5236\u6570\u636e\uff0c\u4f7f\u5176\u4e0e\u4e24\u8005\u7684\u4eb2\u548c\u6027\u66f4\u52a0\u7d27\u5bc6\uff0c\u5305\u62ec\u73b0\u6709\u6570\u636e\u7684\u53ef\u91cd\u7528\u6027\u5728\u5185\u3002\u5728\u7ade\u4e89\u65b9\u9762\uff0c\u9002\u5f53\u7684\u8868\u8fbe\u662f\u201cSpark\u5904\u7406\u65b9\u5f0f\u4e0eMapReduce\u5904\u7406\u65b9\u5f0f\u7684\u7ade\u4e89\u201d\u3002<\/p>\n<h1>\u6709\u5173Apache Spark<\/h1>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u534a\u69cb\u9020\u5316\u30c7\u30fc\u30bf(https:\/\/jp.drinet.co.jp\/blog\/datamanagement\/semi-structured-data)<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u69cb\u9020\u5316\u30c7\u30fc\u30bf<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u30b9\u30c8\u30ea\u30fc\u30df\u30f3\u30b0\u30c7\u30fc\u30bf<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u6a5f\u68b0\u5b66\u7fd2<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\u30c7\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9<\/ul>\n<p>\u53ef\u4ee5\u4ece\u4e3b\u8282\u70b9\u4e0a\u7684\u4e00\u4e2a\u9a71\u52a8\u7a0b\u5e8f\u8fdb\u7a0b\uff08\u53ef\u4ee5\u5177\u6709\u591a\u4e2a\u5de5\u4f5c\uff09\u542f\u52a8\u3002<\/p>\n<p>\u53d1\u9001\u6307\u4ee4\u7ed9\u5206\u5e03\u5728\u591a\u4e2a\u5de5\u4f5c\u8282\u70b9\u4e0a\u7684Executor\u8fdb\u7a0b\uff08\u591a\u4e2a\u4efb\u52a1\uff09\u3002<\/p>\n<p>\u6709\u5411\u975e\u5faa\u73af\u56fe\u5b58\u5728\u65f6\uff0cSpark\u4f1a\u901a\u8fc7\u4f18\u5316\u4efb\u52a1\u8c03\u5ea6\u548c\u6267\u884c\u6765\u5904\u7406\u5b83\u4eec\u3002<\/p>\n<h1>RDD (\u8010\u969c\u5bb3\u6027\u5206\u6563\u6570\u636e\u96c6)\u53ef\u4ee5\u88ab\u89c6\u4e3a\u4e00\u79cd\u8010\u969c\u5bb3\u6027\u7684\u3001\u5206\u5e03\u5f0f\u7684\u6570\u636e\u96c6\u3002<\/h1>\n<p>\u8fd9\u4e2a\u9879\u76ee\u4e3b\u8981\u7531\u4e0d\u53ef\u53d8\u7684Java\u865a\u62df\u673a(JVM)\u5bf9\u8c61\u5206\u5e03\u5f0f\u96c6\u5408\u6784\u6210\u3002<br \/>\nPython\u6570\u636e\u4fdd\u5b58\u5728JVM\u5bf9\u8c61\u4e2d\u3002<br \/>\n\u901a\u8fc7\u4f7f\u7528\u8fd9\u4e9b\u5bf9\u8c61\uff0c\u53ef\u4ee5\u5728\u4efb\u4f55\u4f5c\u4e1a\u4e2d\u9ad8\u901f\u6267\u884c\u8ba1\u7b97\u64cd\u4f5c\u3002<br \/>\nRDD\u5229\u7528\u5185\u5b58\u6709\u6548\u5730\u8fdb\u884c\u8ba1\u7b97\uff0c\u7f13\u5b58\u548c\u5b58\u50a8\u3002<br \/>\n\u56e0\u6b64\uff0c\u4e0e\u5176\u4ed6\u4f20\u7edf\u6846\u67b6\u5982Apache Hadoop\u76f8\u6bd4\uff0c\u8ba1\u7b97\u901f\u5ea6\u63d0\u9ad8\u4e86\u51e0\u4e2a\u6570\u91cf\u7ea7\u3002<\/p>\n<h3>RDD\u7684\u529f\u80fd<\/h3>\n<blockquote><p>\u751f\u6210\u5730\u56fe<br \/>\n\u5408\u5e76<br \/>\n\u7b5b\u9009<br \/>\n&#8230;\u7b49\u7b49<\/p><\/blockquote>\n<p>RDD\u53ef\u4ee5\u5e76\u884c\u5730\u5e94\u7528\u6570\u636e\u8f6c\u6362\u548c\u8bb0\u5f55\u64cd\u4f5c\u3002\u56e0\u6b64\uff0c\u5b83\u5177\u6709\u66f4\u5feb\u7684\u901f\u5ea6\u548c\u66f4\u597d\u7684\u5bb9\u9519\u6027\u3002<br \/>\n\u901a\u8fc7\u6ce8\u518c\u8f6c\u6362\u64cd\u4f5c\uff0cRDD\u53ef\u4ee5\u63d0\u4f9b\u6570\u636e\u7684\u91cd\u5efa\u529f\u80fd\u3002<br \/>\n\u5982\u679c\u51fa\u73b0\u95ee\u9898\u5bfc\u81f4\u90e8\u5206\u6570\u636e\u4e22\u5931\uff0c\u53ef\u4ee5\u8fdb\u884c\u56de\u9000\u64cd\u4f5c\u3002\u5982\u679c\u6570\u636e\u4e22\u5931\uff0c\u53ef\u4ee5\u91cd\u65b0\u8ba1\u7b97\u3002<\/p>\n<h4>RDD\u64cd\u4f5c<\/h4>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u5909\u63db(\u65b0\u3057\u3044RDD\u30dd\u30a4\u30f3\u30bf\u30fc\u3092\u8fd4\u3059\u3002) \u2192 \u9045\u5ef6\u51e6\u7406<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\u30a2\u30af\u30b7\u30e7\u30f3(\u6f14\u7b97\u51e6\u7406\u3092\u884c\u3044\u3001\u5024\u3092\u30c9\u30e9\u30a4\u30d0\u30fc\u306b\u8fd4\u3059\u3002)\u77e2\u5370\u623b\u308a\u5024\u306e\u3053\u3068\uff1f<\/ul>\n<p>Spark\u7684\u6700\u5927\u4f18\u70b9\u662f\u80fd\u591f\u5e76\u884c\u5904\u7406\u3002<\/p>\n<p>\u4e3a\u4e86\u4e86\u89e3\u6570\u636e\u96c6\u7684\u60c5\u51b5\uff0c\u5728\u5206\u6790\u8fc7\u7a0b\u4e2d\u7ecf\u5e38\u4f7f\u7528\u7684\u6b65\u9aa4\u3002<br \/>\n1. \u8ba1\u7b97\u6bcf\u4e2a\u503c\u5728\u67d0\u4e00\u5217\u4e2d\u51fa\u73b0\u7684\u6b21\u6570\u3002<br \/>\n2. \u9009\u62e9\u4ee5A\u5f00\u5934\u7684\u503c\u3002<br \/>\n3. \u5c06\u7ed3\u679c\u663e\u793a\u5728\u5c4f\u5e55\u4e0a\u3002<\/p>\n<p>\u6570\u636e\u53ef\u4ee5\u4ee5\u591a\u79cd\u683c\u5f0f\u8fdb\u884c\u652f\u6301\uff0c\u5982\u6587\u672c\u3001parquet\u3001JSON\u3001Hive\u8868\u7b49\u3002<br \/>\n\u53ef\u4ee5\u4f7f\u7528JDBC\u9a71\u52a8\u7a0b\u5e8f\u4ece\u5173\u7cfb\u578b\u6570\u636e\u5e93\u4e2d\u8bfb\u53d6\u6570\u636e\u3002<br \/>\nSpark\u53ef\u4ee5\u81ea\u52a8\u5904\u7406\u538b\u7f29\u7684\u6570\u636e\u96c6\uff0c\u51e0\u4e4e\u53ef\u4ee5\u6df7\u5408\u4f7f\u7528\u4efb\u4f55\u5185\u5bb9\u3002<br \/>\n* \u6570\u636e\u7ed3\u6784\u53ef\u4ee5\u662f\u65e0\u6a21\u5f0f\u7684(tuple\u3001dict\u3001list)<\/p>\n<h4>\u5143\u6570\u636e<\/h4>\n<p>* \u30e1\u30bf\u30c7\u30fc\u30bf\u3068\u306f\u4e00\u8a00\u3067\u8a00\u3048\u3070\u3001\u300c\u5173\u4e8e\u6570\u636e\u7684\u6570\u636e\u300d\u3002<br \/>\n* \u6570\u636e\uff1a\u5206\u4e3a\u201c\u6709\u7ed3\u6784\u6570\u636e\u201d\u548c\u201c\u65e0\u7ed3\u6784\u6570\u636e\u201d\u3002<br \/>\n* \u6709\u7ed3\u6784\u6570\u636e\uff1a\u6709\u7ed3\u6784\u6570\u636e\u7684\u5185\u5bb9\u548c\u683c\u5f0f\u90fd\u662f\u4e8b\u5148\u786e\u5b9a\u7684\uff0c\u5e76\u5728\u5173\u7cfb\u6570\u636e\u5e93\u7ba1\u7406\u7cfb\u7edf\u4e2d\u5b9e\u73b0\u3002<br \/>\n* \u65e0\u7ed3\u6784\u6570\u636e\uff1a\u65e0\u7ed3\u6784\u6570\u636e\u7684\u5185\u5bb9\u548c\u683c\u5f0f\u6ca1\u6709\u660e\u786e\u7684\u89c4\u5b9a\uff0c\u53ef\u4ee5\u9002\u7528\u4e8e\u5404\u79cd\u6570\u636e\u3002\uff08\u5305\u62ec\u56fe\u50cf\u3001\u97f3\u9891\u3001\u6587\u672c\u6570\u636e\u7b49\u90fd\u5c5e\u4e8e\u65e0\u7ed3\u6784\u6570\u636e\uff09<\/p>\n<h1>\u4f7f\u7528Python\u5728Pyspark\u4e2d\u7684\u65b9\u6cd5<\/h1>\n<ul class=\"post-ul\">\u30af\u30e9\u30b9\u305f\u30e2\u30fc\u30c9\u3068\u30ed\u30fc\u30ab\u30eb\u30e2\u30fc\u30c9\u304c\u5b58\u5728\u3059\u308b\u3002<\/ul>\n<p>\u5728\u672c\u5730\u6a21\u5f0f\u4e0b\u6267\u884c<br \/>\n\u4f7f\u7528\u5e38\u89c4\u7684Python\u4ee3\u7801\u4e5f\u6ca1\u6709\u95ee\u9898\u3002<br \/>\n\u7ed3\u6784\u4e0a\u7684\u53d8\u5316\u66f4\u5bb9\u6613\u53d1\u751f\u5728\u6570\u636e\u548c\u4ee3\u7801\u5728\u72ec\u7acb\u7684\u5de5\u4f5c\u8fdb\u7a0b\u4e4b\u95f4\u88ab\u590d\u5236\u7684\u60c5\u51b5\u4e0b\u3002<\/p>\n<p>\u5728\u96c6\u7fa4\u6a21\u5f0f\u4e0b\u6267\u884c<br \/>\n\u5f53\u4f5c\u4e1a\u88ab\u63d0\u4ea4\u5e76\u6267\u884c\u65f6\uff0c\u8be5\u4f5c\u4e1a\u4f1a\u88ab\u53d1\u9001\u5230\u9a71\u52a8\u8282\u70b9\u3002<br \/>\n\u9a71\u52a8\u8282\u70b9\u4f1a\u751f\u6210\u4f5c\u4e1a\u4e2d\u7684\u6709\u5411\u65e0\u73af\u56fe(DAG)\uff0c\u5e76\u786e\u5b9a\u6bcf\u4e2a\u6267\u884c\u8282\u70b9\u3002<br \/>\n\u7136\u540e\uff0c\u9a71\u52a8\u8282\u70b9\u4f1a\u6307\u793a\u5de5\u4f5c\u8282\u70b9\u6267\u884c\u5404\u81ea\u7684\u4efb\u52a1\uff0c\u5e76\u5728\u5904\u7406\u5b8c\u6210\u540e\u5c06\u7ed3\u679c\u8fd4\u56de\u7ed9\u9a71\u52a8\u8282\u70b9\u3002<\/p>\n<h1>\u5173\u4e8e\u5173\u7cfb\u6570\u636e\u5e93\u7ba1\u7406\u7cfb\u7edf\uff08RDMS\uff09<\/h1>\n<p>\u5728\u7ba1\u7406\u548c\u5229\u7528\u6570\u636e\u7684\u7cfb\u7edf\u4e2d\uff0cMySQL\u3001Oracle\u7b49\u5173\u7cfb\u6570\u636e\u5e93\u7ba1\u7406\u7cfb\u7edf\uff08RDBMS\uff09\u662f\u4ee3\u8868\u6027\u7684\u7cfb\u7edf\u3002<br \/>\nRDBMS\u53ef\u4ee5\u5904\u7406\u590d\u6742\u7684\u6570\u636e\u5e76\u5b9e\u65f6\u5904\u7406\uff0c\u4f46\u5728\u5904\u7406\u5927\u91cf\u6570\u636e\u65f6\u4f1a\u51fa\u73b0\u6027\u80fd\u4e0b\u964d\u7684\u5f31\u70b9\u3002<\/p>\n<p>\u4e3a\u4e86\u5904\u7406\u6570\u636e\u5e93\u65e0\u6cd5\u8ddf\u4e0a\u7684\u5927\u91cf\u6570\u636e\uff0c\u5f15\u5165\u4e86\u5206\u5e03\u5f0f\u5904\u7406\u7684\u6982\u5ff5\u3002\u5c06\u6570\u636e\u5206\u5272\u6210\u591a\u4e2a\u670d\u52a1\u5668\u6216CPU\u8fdb\u884c\u5904\u7406\uff0c\u4ee5\u4fbf\u9ad8\u901f\u5904\u7406\u5927\u91cf\u6570\u636e\u3002\u53ef\u4ee5\u60f3\u8c61\u591a\u53f0\u8ba1\u7b97\u673a\u4e92\u76f8\u5206\u62c5\u5de5\u4f5c\u8fdb\u884c\u5904\u7406\uff0c\u8fd9\u6837\u66f4\u5bb9\u6613\u7406\u89e3\u3002<\/p>\n<p>\u90a3\u5c31\u662f\u4e3b\u8282\u70b9\u548c\u6838\u5fc3\u8282\u70b9\u3002<\/p>\n<p>\u5206\u6563\u5904\u7406\u88ab\u7528\u4e8e\u9700\u8981\u5927\u91cf\u6570\u636e\u5904\u7406\u7684\u5de5\u4f5c\uff0c\u5982\u6c14\u8c61\u9884\u6d4b\u3001\u707e\u5bb3\u9884\u6d4b\u3001\u57fa\u56e0\u89e3\u6790\u3001\u5b9e\u65f6\u5206\u6790\u793e\u4ea4\u7f51\u7edc\u3001\u7f51\u7ad9\u7528\u6237\u884c\u4e3a\u5206\u6790\u7b49\u3002\u5728\u5904\u7406\u5927\u6570\u636e\u65b9\u9762\uff0c\u5206\u6563\u5904\u7406\u662f\u4e0d\u53ef\u6216\u7f3a\u7684\u8981\u7d20\uff0c\u5176\u9700\u6c42\u8fd1\u5e74\u6765\u6301\u7eed\u589e\u957f\u3002<\/p>\n<p>Hadoop\u548cSpark\u662f\u53ef\u4ee5\u7528\u4e8e\u5d4c\u5165\u5206\u6563\u5904\u7406\u529f\u80fd\u7684\u6846\u67b6\u7684\u4ee3\u8868\u3002<\/p>\n<h1>Jupyter Notebook\u4e2d\u7684PySpark Docker<\/h1>\n","protected":false},"excerpt":{"rendered":"<p>\u5173\u4e8eHadoop\u548cSpark\u7684\u5206\u5e03\u5f0f\u5904\u7406 \u5de8\u5927\u30c7\u30fc\u30bf\u306e\u53d6\u308a\u6271\u3044\u3092\u76ee\u7684\u3068\u3057\u305f\u5206\u6563\u51e6\u7406\u306e\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af &#038;nbsp [&hellip;]<\/p>\n","protected":false},"author":8,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-50456","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"yoast_head":"<!-- This site is optimized with the Yoast SEO Premium plugin v21.5 (Yoast SEO v21.5) - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>\u5173\u4e8eSpark\uff08Python\u3001Java\u3001JVM\u3001RDD\uff09 - Blog - Silicon Cloud<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.silicloud.com\/zh\/blog\/\u5173\u4e8espark\uff08python\u3001java\u3001jvm\u3001rdd\uff09\/\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u5173\u4e8eSpark\uff08Python\u3001Java\u3001JVM\u3001RDD\uff09\" \/>\n<meta property=\"og:description\" content=\"\u5173\u4e8eHadoop\u548cSpark\u7684\u5206\u5e03\u5f0f\u5904\u7406 \u5de8\u5927\u30c7\u30fc\u30bf\u306e\u53d6\u308a\u6271\u3044\u3092\u76ee\u7684\u3068\u3057\u305f\u5206\u6563\u51e6\u7406\u306e\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af &amp;nbsp [&hellip;]\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.silicloud.com\/zh\/blog\/\u5173\u4e8espark\uff08python\u3001java\u3001jvm\u3001rdd\uff09\/\" \/>\n<meta property=\"og:site_name\" content=\"Blog - Silicon Cloud\" \/>\n<meta property=\"article:published_time\" content=\"2023-03-05T14:27:52+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2024-05-03T20:28:46+00:00\" \/>\n<meta name=\"author\" content=\"\u96c5, \u609f\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"\u96c5, \u609f\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"1 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%85%b3%e4%ba%8espark%ef%bc%88python%e3%80%81java%e3%80%81jvm%e3%80%81rdd%ef%bc%89\/\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%85%b3%e4%ba%8espark%ef%bc%88python%e3%80%81java%e3%80%81jvm%e3%80%81rdd%ef%bc%89\/\",\"name\":\"\u5173\u4e8eSpark\uff08Python\u3001Java\u3001JVM\u3001RDD\uff09 - Blog - Silicon Cloud\",\"isPartOf\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\"},\"datePublished\":\"2023-03-05T14:27:52+00:00\",\"dateModified\":\"2024-05-03T20:28:46+00:00\",\"author\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/f044a4b7fa4ee2701702942002419ca6\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%85%b3%e4%ba%8espark%ef%bc%88python%e3%80%81java%e3%80%81jvm%e3%80%81rdd%ef%bc%89\/#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%85%b3%e4%ba%8espark%ef%bc%88python%e3%80%81java%e3%80%81jvm%e3%80%81rdd%ef%bc%89\/\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%85%b3%e4%ba%8espark%ef%bc%88python%e3%80%81java%e3%80%81jvm%e3%80%81rdd%ef%bc%89\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.silicloud.com\/zh\/blog\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u5173\u4e8eSpark\uff08Python\u3001Java\u3001JVM\u3001RDD\uff09\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/\",\"name\":\"Blog - Silicon Cloud\",\"description\":\"\",\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/f044a4b7fa4ee2701702942002419ca6\",\"name\":\"\u96c5, \u609f\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/secure.gravatar.com\/avatar\/e71a913e914f1aad1efc391f92084294bac54bc782acd289638580134cf667a6?s=96&d=mm&r=g\",\"contentUrl\":\"https:\/\/secure.gravatar.com\/avatar\/e71a913e914f1aad1efc391f92084294bac54bc782acd289638580134cf667a6?s=96&d=mm&r=g\",\"caption\":\"\u96c5, \u609f\"},\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/author\/yawu\/\"},{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%85%b3%e4%ba%8espark%ef%bc%88python%e3%80%81java%e3%80%81jvm%e3%80%81rdd%ef%bc%89\/#local-main-organization-logo\",\"url\":\"\",\"contentUrl\":\"\",\"caption\":\"Blog - Silicon Cloud\"}]}<\/script>\n<!-- \/ Yoast SEO Premium plugin. -->","yoast_head_json":{"title":"\u5173\u4e8eSpark\uff08Python\u3001Java\u3001JVM\u3001RDD\uff09 - Blog - Silicon Cloud","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.silicloud.com\/zh\/blog\/\u5173\u4e8espark\uff08python\u3001java\u3001jvm\u3001rdd\uff09\/","og_locale":"zh_CN","og_type":"article","og_title":"\u5173\u4e8eSpark\uff08Python\u3001Java\u3001JVM\u3001RDD\uff09","og_description":"\u5173\u4e8eHadoop\u548cSpark\u7684\u5206\u5e03\u5f0f\u5904\u7406 \u5de8\u5927\u30c7\u30fc\u30bf\u306e\u53d6\u308a\u6271\u3044\u3092\u76ee\u7684\u3068\u3057\u305f\u5206\u6563\u51e6\u7406\u306e\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af &nbsp [&hellip;]","og_url":"https:\/\/www.silicloud.com\/zh\/blog\/\u5173\u4e8espark\uff08python\u3001java\u3001jvm\u3001rdd\uff09\/","og_site_name":"Blog - Silicon Cloud","article_published_time":"2023-03-05T14:27:52+00:00","article_modified_time":"2024-05-03T20:28:46+00:00","author":"\u96c5, \u609f","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"\u96c5, \u609f","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"1 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%85%b3%e4%ba%8espark%ef%bc%88python%e3%80%81java%e3%80%81jvm%e3%80%81rdd%ef%bc%89\/","url":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%85%b3%e4%ba%8espark%ef%bc%88python%e3%80%81java%e3%80%81jvm%e3%80%81rdd%ef%bc%89\/","name":"\u5173\u4e8eSpark\uff08Python\u3001Java\u3001JVM\u3001RDD\uff09 - Blog - Silicon Cloud","isPartOf":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website"},"datePublished":"2023-03-05T14:27:52+00:00","dateModified":"2024-05-03T20:28:46+00:00","author":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/f044a4b7fa4ee2701702942002419ca6"},"breadcrumb":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%85%b3%e4%ba%8espark%ef%bc%88python%e3%80%81java%e3%80%81jvm%e3%80%81rdd%ef%bc%89\/#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.silicloud.com\/zh\/blog\/%e5%85%b3%e4%ba%8espark%ef%bc%88python%e3%80%81java%e3%80%81jvm%e3%80%81rdd%ef%bc%89\/"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%85%b3%e4%ba%8espark%ef%bc%88python%e3%80%81java%e3%80%81jvm%e3%80%81rdd%ef%bc%89\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.silicloud.com\/zh\/blog\/"},{"@type":"ListItem","position":2,"name":"\u5173\u4e8eSpark\uff08Python\u3001Java\u3001JVM\u3001RDD\uff09"}]},{"@type":"WebSite","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website","url":"https:\/\/www.silicloud.com\/zh\/blog\/","name":"Blog - Silicon Cloud","description":"","inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/f044a4b7fa4ee2701702942002419ca6","name":"\u96c5, \u609f","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/","url":"https:\/\/secure.gravatar.com\/avatar\/e71a913e914f1aad1efc391f92084294bac54bc782acd289638580134cf667a6?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/e71a913e914f1aad1efc391f92084294bac54bc782acd289638580134cf667a6?s=96&d=mm&r=g","caption":"\u96c5, \u609f"},"url":"https:\/\/www.silicloud.com\/zh\/blog\/author\/yawu\/"},{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%85%b3%e4%ba%8espark%ef%bc%88python%e3%80%81java%e3%80%81jvm%e3%80%81rdd%ef%bc%89\/#local-main-organization-logo","url":"","contentUrl":"","caption":"Blog - Silicon Cloud"}]}},"_links":{"self":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/50456","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/users\/8"}],"replies":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/comments?post=50456"}],"version-history":[{"count":2,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/50456\/revisions"}],"predecessor-version":[{"id":96117,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/50456\/revisions\/96117"}],"wp:attachment":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/media?parent=50456"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/categories?post=50456"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/tags?post=50456"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}