{"id":36404,"date":"2022-11-11T09:27:05","date_gmt":"2022-11-23T10:30:37","guid":{"rendered":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8apache-spark\/"},"modified":"2024-04-29T00:07:39","modified_gmt":"2024-04-28T16:07:39","slug":"%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8apache-spark","status":"publish","type":"post","link":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8apache-spark\/","title":{"rendered":"\u5c1d\u8bd5\u4f7f\u7528Apache Spark"},"content":{"rendered":"<h1>Apache Spark \u5c31\u662f&#8230;<\/h1>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u5206\u6563\u51e6\u7406\u3092\u884c\u3046\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u8907\u6570\u53f0\u3067\u30af\u30e9\u30b9\u30bf\u69cb\u6210\u3092\u53d6\u308a\u5404\u30ef\u30fc\u30ab\u30fc\u3067\u51e6\u7406\u3092\u624b\u5206\u3051\u3057\u3066\u884c\u3046<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">Hadoop \u306e MapReduce \u5c64\u3068\u540c\u3058\u3088\u3046\u306a\u5f79\u5272\u3092\u6301\u3061\u3001\u30d5\u30a1\u30a4\u30eb\u30b7\u30b9\u30c6\u30e0\u30d9\u30fc\u30b9\u3067\u30c7\u30fc\u30bf\u3092\u6271\u3046 Hadoop \u306b\u5bfe\u3057\u3066\u30e1\u30e2\u30ea\u30d9\u30fc\u30b9\u3067\u30c7\u30fc\u30bf\u3092\u6271\u3046\u305f\u3081\u9ad8\u901f<\/ul>\n<h1>\u5728 Kubernetes \u73af\u5883\u4e2d\u542f\u52a8 Spark\u3002<\/h1>\n<p>https:\/\/github.com\/kubernetes\/examples\/blob\/master\/staging\/spark\/README.md \u306b\u30b5\u30f3\u30d7\u30eb\u304c\u3042\u308b<\/p>\n<h2>\u521b\u5efaDocker\u955c\u50cf<\/h2>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u30b5\u30f3\u30d7\u30eb\u306e\u30a4\u30e1\u30fc\u30b8\u306f\u30d0\u30fc\u30b8\u30e7\u30f3\u304c\u53e4\u3044\u306e\u3067\u4f5c\u308a\u76f4\u3057\u3066\u307f\u308b<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\u3064\u3044\u3067\u306b Dockerfile \u5185\u3067\u30a2\u30e1\u30ea\u30ab\u306e\u30b5\u30fc\u30d0\u304b\u3089\u76f4\u63a5\u30bd\u30fc\u30b9\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u3066\u3044\u308b\u306e\u3067\u4e8b\u524d\u306b\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u3066\u30b3\u30d4\u30fc\u3059\u308b\u3088\u3046\u306b\u5909\u66f4<\/ul>\n<pre class=\"post-pre\"><code>FROM java:openjdk-8-jdk\r\n\r\nENV hadoop_ver 2.7.4\r\nENV spark_ver 2.2.0\r\n\r\n# download from http:\/\/ftp.kddilabs.jp\/infosystems\/apache\/hadoop\/common\/hadoop-${hadoop_ver}\/hadoop-${hadoop_ver}-src.tar.gz\r\nCOPY hadoop.tgz \/tmp\/\r\n# download from https:\/\/d3kbcqa49mib13.cloudfront.net\/spark-${spark_ver}-bin-hadoop2.7.tgz\r\nCOPY spark.tgz \/tmp\/\r\n\r\nRUN mkdir -p \/opt &amp;&amp; \\\r\n    cd \/tmp &amp;&amp; \\\r\n    tar -zxf hadoop.tgz &amp;&amp; \\\r\n    mkdir -p \/opt\/hadoop\/lib\/ &amp;&amp; \\\r\n    mv hadoop-${hadoop_ver}-src \/opt\/hadoop\/lib\/native &amp;&amp; \\\r\n    echo Hadoop ${hadoop_ver} native libraries installed in \/opt\/hadoop\/lib\/native\r\n\r\nRUN mkdir -p \/opt &amp;&amp; \\\r\n    cd \/tmp &amp;&amp; \\\r\n    tar -zxf spark.tgz &amp;&amp; \\\r\n    mv spark-2.2.0-bin-hadoop2.7 \/opt\/spark &amp;&amp; \\\r\n    echo Spark ${spark_ver} installed in \/opt\r\n\r\n# Add the GCS connector.\r\nRUN cd \/opt\/spark\/jars &amp;&amp; \\\r\n    curl -O https:\/\/storage.googleapis.com\/hadoop-lib\/gcs\/gcs-connector-latest-hadoop2.jar\r\n\r\n# if numpy is installed on a driver it needs to be installed on all\r\n# workers, so install it everywhere\r\nRUN apt-get update &amp;&amp; \\\r\n    apt-get install -y python-numpy netcat &amp;&amp; \\\r\n    apt-get clean &amp;&amp; \\\r\n    rm -rf \/var\/lib\/apt\/lists\/*\r\n\r\nADD log4j.properties \/opt\/spark\/conf\/log4j.properties\r\nADD start-common.sh start-worker start-master \/\r\nADD core-site.xml \/opt\/spark\/conf\/core-site.xml\r\nADD spark-defaults.conf \/opt\/spark\/conf\/spark-defaults.conf\r\nENV PATH $PATH:\/opt\/spark\/bin\r\n<\/code><\/pre>\n<h2>\u542f\u52a8\u4e3b\u5bb9\u5668<\/h2>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u3053\u306e\u30b5\u30f3\u30d7\u30eb\u3067\u306f\u30af\u30e9\u30b9\u30bf\u30ea\u30f3\u30b0\u306b Standalone mode \u3068\u3044\u3046 Spark \u7d44\u307f\u8fbc\u307f\u306e\u30af\u30e9\u30b9\u30bf\u30de\u30cd\u30fc\u30b8\u30e3\u3092\u4f7f\u3044\u30011 \u53f0\u306e\u30de\u30b9\u30bf\u30fc\u3068\u8907\u6570\u53f0\u306e\u30ef\u30fc\u30ab\u30fc\u3068\u3044\u3046\u69cb\u6210\u3092\u53d6\u308b<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\u30de\u30b9\u30bf\u30fc\u306e\u30b3\u30f3\u30c6\u30ca\u3092\u7acb\u3061\u4e0a\u3052\u308b Kubernetes \u306e\u5b9a\u7fa9\u30d5\u30a1\u30a4\u30eb\u304c ReplicationController \u3067\u66f8\u304b\u308c\u3066\u3044\u305f\u306e\u3067\u5f8c\u7d99\u306e Deployment \u306b\u5909\u66f4\u3057\u3066\u7acb\u3061\u4e0a\u3052\u308b<\/ul>\n<h3>\u5b9a\u4e49\u6587\u4ef6<\/h3>\n<pre class=\"post-pre\"><code><span class=\"na\">apiVersion<\/span><span class=\"pi\">:<\/span> <span class=\"s\">extensions\/v1beta1<\/span>\r\n<span class=\"na\">kind<\/span><span class=\"pi\">:<\/span> <span class=\"s\">Deployment<\/span>\r\n<span class=\"na\">metadata<\/span><span class=\"pi\">:<\/span>\r\n  <span class=\"na\">name<\/span><span class=\"pi\">:<\/span> <span class=\"s\">spark-master-controller<\/span>\r\n<span class=\"na\">spec<\/span><span class=\"pi\">:<\/span>\r\n  <span class=\"na\">replicas<\/span><span class=\"pi\">:<\/span> <span class=\"m\">1<\/span>\r\n  <span class=\"na\">template<\/span><span class=\"pi\">:<\/span>\r\n    <span class=\"na\">metadata<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"na\">labels<\/span><span class=\"pi\">:<\/span>\r\n        <span class=\"na\">component<\/span><span class=\"pi\">:<\/span> <span class=\"s\">spark-master<\/span>\r\n    <span class=\"na\">spec<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"na\">containers<\/span><span class=\"pi\">:<\/span>\r\n        <span class=\"pi\">-<\/span> <span class=\"na\">name<\/span><span class=\"pi\">:<\/span> <span class=\"s\">spark-master<\/span>\r\n          <span class=\"na\">image<\/span><span class=\"pi\">:<\/span> <span class=\"s\">bl\/spark:v1.0<\/span>\r\n          <span class=\"na\">command<\/span><span class=\"pi\">:<\/span> <span class=\"pi\">[<\/span><span class=\"s2\">\"<\/span><span class=\"s\">\/start-master\"<\/span><span class=\"pi\">]<\/span>\r\n          <span class=\"na\">ports<\/span><span class=\"pi\">:<\/span>\r\n            <span class=\"pi\">-<\/span> <span class=\"na\">containerPort<\/span><span class=\"pi\">:<\/span> <span class=\"m\">7077<\/span>\r\n            <span class=\"pi\">-<\/span> <span class=\"na\">containerPort<\/span><span class=\"pi\">:<\/span> <span class=\"m\">8080<\/span>\r\n          <span class=\"na\">resources<\/span><span class=\"pi\">:<\/span>\r\n            <span class=\"na\">requests<\/span><span class=\"pi\">:<\/span>\r\n              <span class=\"na\">cpu<\/span><span class=\"pi\">:<\/span> <span class=\"s\">100m<\/span>\r\n<\/code><\/pre>\n<h3>\u6307\u4ee4<\/h3>\n<pre class=\"post-pre\"><code># \u30de\u30b9\u30bf\u30fc\u30b3\u30f3\u30c6\u30ca\u306e\u7acb\u3061\u4e0a\u3052\r\nkubectl apply -f spark-master-controller.yml\r\n\r\n# \u30de\u30b9\u30bf\u30fc\u30b5\u30fc\u30d3\u30b9\u306e\u7acb\u3061\u4e0a\u3052. \u3053\u3061\u3089\u306f\u30b5\u30f3\u30d7\u30eb\u305d\u306e\u307e\u307e\u4f7f\u7528\r\nkubectl apply -f spark-master-service.yml\r\n<\/code><\/pre>\n<h2>\u542f\u52a8\u5de5\u4f5c\u5bb9\u5668<\/h2>\n<ul class=\"post-ul\">\u30ef\u30fc\u30ab\u30fc\u30b3\u30f3\u30c6\u30ca\u3082\u540c\u69d8\u306b Deployment \u3067\u5b9a\u7fa9\u3057\u76f4\u3057\u3066\u7acb\u3061\u4e0a\u3052\u308b<\/ul>\n<pre class=\"post-pre\"><code><span class=\"na\">apiVersion<\/span><span class=\"pi\">:<\/span> <span class=\"s\">extensions\/v1beta1<\/span>\r\n<span class=\"na\">kind<\/span><span class=\"pi\">:<\/span> <span class=\"s\">Deployment<\/span>\r\n<span class=\"na\">metadata<\/span><span class=\"pi\">:<\/span>\r\n  <span class=\"na\">name<\/span><span class=\"pi\">:<\/span> <span class=\"s\">spark-worker-controller<\/span>\r\n<span class=\"na\">spec<\/span><span class=\"pi\">:<\/span>\r\n  <span class=\"na\">replicas<\/span><span class=\"pi\">:<\/span> <span class=\"m\">3<\/span>\r\n  <span class=\"na\">template<\/span><span class=\"pi\">:<\/span>\r\n    <span class=\"na\">metadata<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"na\">labels<\/span><span class=\"pi\">:<\/span>\r\n        <span class=\"na\">component<\/span><span class=\"pi\">:<\/span> <span class=\"s\">spark-worker<\/span>\r\n    <span class=\"na\">spec<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"na\">containers<\/span><span class=\"pi\">:<\/span>\r\n        <span class=\"pi\">-<\/span> <span class=\"na\">name<\/span><span class=\"pi\">:<\/span> <span class=\"s\">spark-worker<\/span>\r\n          <span class=\"na\">image<\/span><span class=\"pi\">:<\/span> <span class=\"s\">bl\/spark:v1.0<\/span>\r\n          <span class=\"na\">command<\/span><span class=\"pi\">:<\/span> <span class=\"pi\">[<\/span><span class=\"s2\">\"<\/span><span class=\"s\">\/start-worker\"<\/span><span class=\"pi\">]<\/span>\r\n          <span class=\"na\">ports<\/span><span class=\"pi\">:<\/span>\r\n            <span class=\"pi\">-<\/span> <span class=\"na\">containerPort<\/span><span class=\"pi\">:<\/span> <span class=\"m\">8081<\/span>\r\n          <span class=\"na\">resources<\/span><span class=\"pi\">:<\/span>\r\n            <span class=\"na\">requests<\/span><span class=\"pi\">:<\/span>\r\n              <span class=\"na\">cpu<\/span><span class=\"pi\">:<\/span> <span class=\"s\">100m<\/span>\r\n<\/code><\/pre>\n<pre class=\"post-pre\"><code>kubectl apply -f spark-worker-controller.yml\r\n<\/code><\/pre>\n<h1>\u8bd5\u7740\u8fd0\u884cSpark<\/h1>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u4eca\u56de\u4f5c\u3063\u305f\u30a4\u30e1\u30fc\u30b8\u3060\u3068 \/opt\/spark\/bin \u306b\u3042\u308b\u30b9\u30af\u30ea\u30d7\u30c8\u3092\u4f7f\u3063\u3066 Spark \u306b\u51e6\u7406\u3092\u5b9f\u884c\u3055\u305b\u308b\u3053\u3068\u304c\u3067\u304d\u308b<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\u4efb\u610f\u306e\u30b3\u30f3\u30c6\u30ca\u306b\u30ed\u30b0\u30a4\u30f3\u3057\u3066\u30b9\u30af\u30ea\u30d7\u30c8\u3092\u5b9f\u884c\u3057\u3066\u307f\u308b<\/ul>\n<pre class=\"post-pre\"><code># \u5b9f\u884c\u4e2d\u306e Pod \u3092\u78ba\u8a8d\r\nkubectl get pods\r\n\r\n# \u30ed\u30b0\u30a4\u30f3\r\nkubectl exec -it spark-master-controller-xxxxxxxxxx \/bin\/sh\r\n\r\n# Scala \u3068 Java \u306e\u30b5\u30f3\u30d7\u30eb\u30b9\u30af\u30ea\u30d7\u30c8\u5b9f\u884c\u306b\u306f run-example \u3092\u4f7f\u3046\r\n\/opt\/spark\/bin\/run-example SparkPi\r\n\r\n# Python \u306e\u30b5\u30f3\u30d7\u30eb\u30b9\u30af\u30ea\u30d7\u30c8\u5b9f\u884c\u306b\u306f spark-submit \u3092\u4f7f\u3046\r\n\/opt\/spark\/bin\/spark-submit examples\/src\/main\/python\/pi.py\r\n\r\n# R \u306e\u30b5\u30f3\u30d7\u30eb\u30b9\u30af\u30ea\u30d7\u30c8\u5b9f\u884c\u306b\u306f spark-submit \u3092\u4f7f\u3046\r\n\/opt\/spark\/bin\/spark-submit examples\/src\/main\/r\/dataframe.R\r\n<\/code><\/pre>\n<h1>\u521b\u4f5c\u53ef\u4ee5\u5728Spark\u4e0a\u8fd0\u884c\u7684\u811a\u672c\u3002<\/h1>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u30b5\u30f3\u30d7\u30eb\u306f \/opt\/spark\/examples \u306b\u3042\u308b\u306e\u3067\u3053\u308c\u3092\u81ea\u5206\u3067\u30b3\u30f3\u30d1\u30a4\u30eb\u3057\u3066\u5b9f\u884c\u3057\u3066\u307f\u308b<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\u4eca\u56de\u306f Java \u3067 Spark Streaming \u3092\u4f7f\u3063\u3066 Kafka \u304b\u3089\u30c7\u30fc\u30bf\u3092\u8aad\u307f\u8fbc\u3093\u3067 Word Count \u3059\u308b\u30b9\u30af\u30ea\u30d7\u30c8\u3092\u4f5c\u3063\u3066\u307f\u308b<\/ul>\n<h2>\u4ee3\u7801<\/h2>\n<pre class=\"post-pre\"><code><span class=\"cm\">\/*\r\n * Licensed to the Apache Software Foundation (ASF) under one or more\r\n * contributor license agreements.  See the NOTICE file distributed with\r\n * this work for additional information regarding copyright ownership.\r\n * The ASF licenses this file to You under the Apache License, Version 2.0\r\n * (the \"License\"); you may not use this file except in compliance with\r\n * the License.  You may obtain a copy of the License at\r\n *\r\n *    http:\/\/www.apache.org\/licenses\/LICENSE-2.0\r\n *\r\n * Unless required by applicable law or agreed to in writing, software\r\n * distributed under the License is distributed on an \"AS IS\" BASIS,\r\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r\n * See the License for the specific language governing permissions and\r\n * limitations under the License.\r\n *\/<\/span>\r\n\r\n<span class=\"kn\">package<\/span> <span class=\"nn\">com.example.spark<\/span><span class=\"o\">;<\/span>\r\n\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">java.util.HashMap<\/span><span class=\"o\">;<\/span>\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">java.util.HashSet<\/span><span class=\"o\">;<\/span>\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">java.util.Arrays<\/span><span class=\"o\">;<\/span>\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">java.util.Map<\/span><span class=\"o\">;<\/span>\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">java.util.Set<\/span><span class=\"o\">;<\/span>\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">java.util.regex.Pattern<\/span><span class=\"o\">;<\/span>\r\n\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">scala.Tuple2<\/span><span class=\"o\">;<\/span>\r\n\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">org.apache.kafka.clients.consumer.ConsumerRecord<\/span><span class=\"o\">;<\/span>\r\n\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">org.apache.spark.SparkConf<\/span><span class=\"o\">;<\/span>\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">org.apache.spark.streaming.api.java.*<\/span><span class=\"o\">;<\/span>\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">org.apache.spark.streaming.kafka010.ConsumerStrategies<\/span><span class=\"o\">;<\/span>\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">org.apache.spark.streaming.kafka010.KafkaUtils<\/span><span class=\"o\">;<\/span>\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">org.apache.spark.streaming.kafka010.LocationStrategies<\/span><span class=\"o\">;<\/span>\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">org.apache.spark.streaming.Durations<\/span><span class=\"o\">;<\/span>\r\n\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">org.apache.log4j.*<\/span><span class=\"o\">;<\/span>\r\n\r\n<span class=\"cm\">\/**\r\n * Consumes messages from one or more topics in Kafka and does wordcount.\r\n * Usage: JavaDirectKafkaWordCount &lt;brokers&gt; &lt;topics&gt;\r\n *   &lt;brokers&gt; is a list of one or more Kafka brokers\r\n *   &lt;topics&gt; is a list of one or more kafka topics to consume from\r\n *\r\n * Example:\r\n *    $ bin\/run-example streaming.JavaDirectKafkaWordCount broker1-host:port,broker2-host:port \\\r\n *      topic1,topic2\r\n *\/<\/span>\r\n\r\n<span class=\"kd\">public<\/span> <span class=\"kd\">final<\/span> <span class=\"kd\">class<\/span> <span class=\"nc\">KafkaWordCount<\/span> <span class=\"o\">{<\/span>\r\n    <span class=\"kd\">private<\/span> <span class=\"kd\">static<\/span> <span class=\"kd\">final<\/span> <span class=\"nc\">Pattern<\/span> <span class=\"no\">SPACE<\/span> <span class=\"o\">=<\/span> <span class=\"nc\">Pattern<\/span><span class=\"o\">.<\/span><span class=\"na\">compile<\/span><span class=\"o\">(<\/span><span class=\"s\">\" \"<\/span><span class=\"o\">);<\/span>\r\n\r\n    <span class=\"kd\">public<\/span> <span class=\"kd\">static<\/span> <span class=\"kt\">void<\/span> <span class=\"nf\">main<\/span><span class=\"o\">(<\/span><span class=\"nc\">String<\/span><span class=\"o\">[]<\/span> <span class=\"n\">args<\/span><span class=\"o\">)<\/span> <span class=\"kd\">throws<\/span> <span class=\"nc\">Exception<\/span> <span class=\"o\">{<\/span>\r\n        <span class=\"k\">if<\/span> <span class=\"o\">(<\/span><span class=\"n\">args<\/span><span class=\"o\">.<\/span><span class=\"na\">length<\/span> <span class=\"o\">&lt;<\/span> <span class=\"mi\">2<\/span><span class=\"o\">)<\/span> <span class=\"o\">{<\/span>\r\n            <span class=\"nc\">System<\/span><span class=\"o\">.<\/span><span class=\"na\">err<\/span><span class=\"o\">.<\/span><span class=\"na\">println<\/span><span class=\"o\">(<\/span><span class=\"s\">\"Usage: JavaDirectKafkaWordCount &lt;brokers&gt; &lt;topics&gt;\\n\"<\/span> <span class=\"o\">+<\/span>\r\n                    <span class=\"s\">\"  &lt;brokers&gt; is a list of one or more Kafka brokers\\n\"<\/span> <span class=\"o\">+<\/span>\r\n                    <span class=\"s\">\"  &lt;topics&gt; is a list of one or more kafka topics to consume from\\n\\n\"<\/span><span class=\"o\">);<\/span>\r\n            <span class=\"nc\">System<\/span><span class=\"o\">.<\/span><span class=\"na\">exit<\/span><span class=\"o\">(<\/span><span class=\"mi\">1<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"o\">}<\/span>\r\n\r\n        <span class=\"c1\">\/\/StreamingExamples.setStreamingLogLevels();<\/span>\r\n        <span class=\"nc\">Logger<\/span><span class=\"o\">.<\/span><span class=\"na\">getRootLogger<\/span><span class=\"o\">().<\/span><span class=\"na\">setLevel<\/span><span class=\"o\">(<\/span><span class=\"nc\">Level<\/span><span class=\"o\">.<\/span><span class=\"na\">WARN<\/span><span class=\"o\">);<\/span>\r\n\r\n        <span class=\"nc\">String<\/span> <span class=\"n\">brokers<\/span> <span class=\"o\">=<\/span> <span class=\"n\">args<\/span><span class=\"o\">[<\/span><span class=\"mi\">0<\/span><span class=\"o\">];<\/span>\r\n        <span class=\"nc\">String<\/span> <span class=\"n\">topics<\/span> <span class=\"o\">=<\/span> <span class=\"n\">args<\/span><span class=\"o\">[<\/span><span class=\"mi\">1<\/span><span class=\"o\">];<\/span>\r\n\r\n        <span class=\"c1\">\/\/ Create context with a 2 seconds batch interval<\/span>\r\n        <span class=\"nc\">SparkConf<\/span> <span class=\"n\">sparkConf<\/span> <span class=\"o\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">SparkConf<\/span><span class=\"o\">().<\/span><span class=\"na\">setAppName<\/span><span class=\"o\">(<\/span><span class=\"s\">\"JavaDirectKafkaWordCount\"<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"nc\">JavaStreamingContext<\/span> <span class=\"n\">jssc<\/span> <span class=\"o\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">JavaStreamingContext<\/span><span class=\"o\">(<\/span><span class=\"n\">sparkConf<\/span><span class=\"o\">,<\/span> <span class=\"nc\">Durations<\/span><span class=\"o\">.<\/span><span class=\"na\">seconds<\/span><span class=\"o\">(<\/span><span class=\"mi\">2<\/span><span class=\"o\">));<\/span>\r\n\r\n        <span class=\"nc\">Set<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">&gt;<\/span> <span class=\"n\">topicsSet<\/span> <span class=\"o\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">HashSet<\/span><span class=\"o\">&lt;&gt;(<\/span><span class=\"nc\">Arrays<\/span><span class=\"o\">.<\/span><span class=\"na\">asList<\/span><span class=\"o\">(<\/span><span class=\"n\">topics<\/span><span class=\"o\">.<\/span><span class=\"na\">split<\/span><span class=\"o\">(<\/span><span class=\"s\">\",\"<\/span><span class=\"o\">)));<\/span>\r\n        <span class=\"nc\">Map<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">,<\/span> <span class=\"nc\">Object<\/span><span class=\"o\">&gt;<\/span> <span class=\"n\">kafkaParams<\/span> <span class=\"o\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">HashMap<\/span><span class=\"o\">&lt;&gt;();<\/span>\r\n        <span class=\"n\">kafkaParams<\/span><span class=\"o\">.<\/span><span class=\"na\">put<\/span><span class=\"o\">(<\/span><span class=\"s\">\"bootstrap.servers\"<\/span><span class=\"o\">,<\/span> <span class=\"n\">brokers<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"n\">kafkaParams<\/span><span class=\"o\">.<\/span><span class=\"na\">put<\/span><span class=\"o\">(<\/span><span class=\"s\">\"group.id\"<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"my-consumer-group\"<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"n\">kafkaParams<\/span><span class=\"o\">.<\/span><span class=\"na\">put<\/span><span class=\"o\">(<\/span><span class=\"s\">\"value.deserializer\"<\/span><span class=\"o\">,<\/span><span class=\"s\">\"org.apache.kafka.common.serialization.StringDeserializer\"<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"n\">kafkaParams<\/span><span class=\"o\">.<\/span><span class=\"na\">put<\/span><span class=\"o\">(<\/span><span class=\"s\">\"key.deserializer\"<\/span><span class=\"o\">,<\/span><span class=\"s\">\"org.apache.kafka.common.serialization.StringDeserializer\"<\/span><span class=\"o\">);<\/span>\r\n\r\n        <span class=\"c1\">\/\/ Create direct kafka stream with brokers and topics<\/span>\r\n        <span class=\"nc\">JavaInputDStream<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">ConsumerRecord<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">,<\/span> <span class=\"nc\">String<\/span><span class=\"o\">&gt;&gt;<\/span> <span class=\"n\">messages<\/span> <span class=\"o\">=<\/span> <span class=\"nc\">KafkaUtils<\/span><span class=\"o\">.<\/span><span class=\"na\">createDirectStream<\/span><span class=\"o\">(<\/span>\r\n                <span class=\"n\">jssc<\/span><span class=\"o\">,<\/span>\r\n                <span class=\"nc\">LocationStrategies<\/span><span class=\"o\">.<\/span><span class=\"na\">PreferConsistent<\/span><span class=\"o\">(),<\/span>\r\n                <span class=\"nc\">ConsumerStrategies<\/span><span class=\"o\">.<\/span><span class=\"na\">Subscribe<\/span><span class=\"o\">(<\/span><span class=\"n\">topicsSet<\/span><span class=\"o\">,<\/span> <span class=\"n\">kafkaParams<\/span><span class=\"o\">));<\/span>\r\n\r\n        <span class=\"c1\">\/\/ Get the lines, split them into words, count the words and print<\/span>\r\n        <span class=\"nc\">JavaDStream<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">&gt;<\/span> <span class=\"n\">lines<\/span> <span class=\"o\">=<\/span> <span class=\"n\">messages<\/span><span class=\"o\">.<\/span><span class=\"na\">map<\/span><span class=\"o\">(<\/span><span class=\"nl\">ConsumerRecord:<\/span><span class=\"o\">:<\/span><span class=\"n\">value<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"nc\">JavaDStream<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">&gt;<\/span> <span class=\"n\">words<\/span> <span class=\"o\">=<\/span> <span class=\"n\">lines<\/span><span class=\"o\">.<\/span><span class=\"na\">flatMap<\/span><span class=\"o\">(<\/span><span class=\"n\">x<\/span> <span class=\"o\">-&gt;<\/span> <span class=\"nc\">Arrays<\/span><span class=\"o\">.<\/span><span class=\"na\">asList<\/span><span class=\"o\">(<\/span><span class=\"no\">SPACE<\/span><span class=\"o\">.<\/span><span class=\"na\">split<\/span><span class=\"o\">(<\/span><span class=\"n\">x<\/span><span class=\"o\">)).<\/span><span class=\"na\">iterator<\/span><span class=\"o\">());<\/span>\r\n        <span class=\"nc\">JavaPairDStream<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">,<\/span> <span class=\"nc\">Integer<\/span><span class=\"o\">&gt;<\/span> <span class=\"n\">wordCounts<\/span> <span class=\"o\">=<\/span> <span class=\"n\">words<\/span><span class=\"o\">.<\/span><span class=\"na\">mapToPair<\/span><span class=\"o\">(<\/span><span class=\"n\">s<\/span> <span class=\"o\">-&gt;<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">Tuple2<\/span><span class=\"o\">&lt;&gt;(<\/span><span class=\"n\">s<\/span><span class=\"o\">,<\/span> <span class=\"mi\">1<\/span><span class=\"o\">))<\/span>\r\n                <span class=\"o\">.<\/span><span class=\"na\">reduceByKey<\/span><span class=\"o\">((<\/span><span class=\"n\">i1<\/span><span class=\"o\">,<\/span> <span class=\"n\">i2<\/span><span class=\"o\">)<\/span> <span class=\"o\">-&gt;<\/span> <span class=\"n\">i1<\/span> <span class=\"o\">+<\/span> <span class=\"n\">i2<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"n\">wordCounts<\/span><span class=\"o\">.<\/span><span class=\"na\">print<\/span><span class=\"o\">();<\/span>\r\n\r\n        <span class=\"c1\">\/\/ Start the computation<\/span>\r\n        <span class=\"n\">jssc<\/span><span class=\"o\">.<\/span><span class=\"na\">start<\/span><span class=\"o\">();<\/span>\r\n        <span class=\"n\">jssc<\/span><span class=\"o\">.<\/span><span class=\"na\">awaitTermination<\/span><span class=\"o\">();<\/span>\r\n    <span class=\"o\">}<\/span>\r\n<span class=\"o\">}<\/span>\r\n<\/code><\/pre>\n<h2>pom.xml \u7684\u4e2d\u6587\u7ffb\u8bd1\u53ef\u4ee5\u662f\u201c\u9879\u76ee\u5bf9\u8c61\u6a21\u578b\u201d\u3002<\/h2>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">maven-assembly-plugin \u3092\u4f7f\u3063\u3066\u4f9d\u5b58\u3057\u3066\u3044\u308b\u30d1\u30c3\u30b1\u30fc\u30b8\u3082\u542b\u3093\u3060 JAR \u30d5\u30a1\u30a4\u30eb\u3092\u4f5c\u6210\u3059\u308b<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">spark_core_2.11 \u306a\u3069\u306e\u30d1\u30c3\u30b1\u30fc\u30b8\u306f Spark \u5074\u3067\u7528\u610f\u3059\u308b\u306e\u3067\u542b\u3081\u308b\u5fc5\u8981\u306f\u306a\u3044<\/ul>\n<pre class=\"post-pre\"><code><span class=\"cp\">&lt;?xml version=\"1.0\" encoding=\"UTF-8\"?&gt;<\/span>\r\n<span class=\"nt\">&lt;project<\/span> <span class=\"na\">xmlns=<\/span><span class=\"s\">\"http:\/\/maven.apache.org\/POM\/4.0.0\"<\/span>\r\n         <span class=\"na\">xmlns:xsi=<\/span><span class=\"s\">\"http:\/\/www.w3.org\/2001\/XMLSchema-instance\"<\/span>\r\n         <span class=\"na\">xsi:schemaLocation=<\/span><span class=\"s\">\"http:\/\/maven.apache.org\/POM\/4.0.0 http:\/\/maven.apache.org\/xsd\/maven-4.0.0.xsd\"<\/span><span class=\"nt\">&gt;<\/span>\r\n    <span class=\"nt\">&lt;modelVersion&gt;<\/span>4.0.0<span class=\"nt\">&lt;\/modelVersion&gt;<\/span>\r\n\r\n    <span class=\"nt\">&lt;groupId&gt;<\/span>com.example.spark<span class=\"nt\">&lt;\/groupId&gt;<\/span>\r\n    <span class=\"nt\">&lt;artifactId&gt;<\/span>spark-app<span class=\"nt\">&lt;\/artifactId&gt;<\/span>\r\n    <span class=\"nt\">&lt;version&gt;<\/span>1.0-SNAPSHOT<span class=\"nt\">&lt;\/version&gt;<\/span>\r\n\r\n    <span class=\"nt\">&lt;properties&gt;<\/span>\r\n        <span class=\"nt\">&lt;project.build.sourceEncoding&gt;<\/span>UTF-8<span class=\"nt\">&lt;\/project.build.sourceEncoding&gt;<\/span>\r\n        <span class=\"nt\">&lt;java.version&gt;<\/span>1.8<span class=\"nt\">&lt;\/java.version&gt;<\/span>\r\n        <span class=\"nt\">&lt;maven.compiler.source&gt;<\/span>1.8<span class=\"nt\">&lt;\/maven.compiler.source&gt;<\/span>\r\n        <span class=\"nt\">&lt;maven.compiler.target&gt;<\/span>1.8<span class=\"nt\">&lt;\/maven.compiler.target&gt;<\/span>\r\n    <span class=\"nt\">&lt;\/properties&gt;<\/span>\r\n\r\n    <span class=\"nt\">&lt;dependencies&gt;<\/span>\r\n        <span class=\"nt\">&lt;dependency&gt;<\/span>\r\n            <span class=\"nt\">&lt;groupId&gt;<\/span>junit<span class=\"nt\">&lt;\/groupId&gt;<\/span>\r\n            <span class=\"nt\">&lt;artifactId&gt;<\/span>junit<span class=\"nt\">&lt;\/artifactId&gt;<\/span>\r\n            <span class=\"nt\">&lt;version&gt;<\/span>3.8.1<span class=\"nt\">&lt;\/version&gt;<\/span>\r\n            <span class=\"nt\">&lt;scope&gt;<\/span>test<span class=\"nt\">&lt;\/scope&gt;<\/span>\r\n        <span class=\"nt\">&lt;\/dependency&gt;<\/span>\r\n        <span class=\"nt\">&lt;dependency&gt;<\/span> <span class=\"c\">&lt;!-- Spark dependency --&gt;<\/span>\r\n            <span class=\"nt\">&lt;groupId&gt;<\/span>org.apache.spark<span class=\"nt\">&lt;\/groupId&gt;<\/span>\r\n            <span class=\"nt\">&lt;artifactId&gt;<\/span>spark-core_2.11<span class=\"nt\">&lt;\/artifactId&gt;<\/span>\r\n            <span class=\"nt\">&lt;version&gt;<\/span>2.2.0<span class=\"nt\">&lt;\/version&gt;<\/span>\r\n            <span class=\"nt\">&lt;scope&gt;<\/span>provided<span class=\"nt\">&lt;\/scope&gt;<\/span>\r\n        <span class=\"nt\">&lt;\/dependency&gt;<\/span>\r\n        <span class=\"nt\">&lt;dependency&gt;<\/span> <span class=\"c\">&lt;!-- Spark dependency --&gt;<\/span>\r\n            <span class=\"nt\">&lt;groupId&gt;<\/span>org.apache.spark<span class=\"nt\">&lt;\/groupId&gt;<\/span>\r\n            <span class=\"nt\">&lt;artifactId&gt;<\/span>spark-sql_2.11<span class=\"nt\">&lt;\/artifactId&gt;<\/span>\r\n            <span class=\"nt\">&lt;version&gt;<\/span>2.2.0<span class=\"nt\">&lt;\/version&gt;<\/span>\r\n            <span class=\"nt\">&lt;scope&gt;<\/span>provided<span class=\"nt\">&lt;\/scope&gt;<\/span>\r\n        <span class=\"nt\">&lt;\/dependency&gt;<\/span>\r\n        <span class=\"nt\">&lt;dependency&gt;<\/span> <span class=\"c\">&lt;!-- Spark dependency --&gt;<\/span>\r\n            <span class=\"nt\">&lt;groupId&gt;<\/span>org.apache.spark<span class=\"nt\">&lt;\/groupId&gt;<\/span>\r\n            <span class=\"nt\">&lt;artifactId&gt;<\/span>spark-streaming_2.11<span class=\"nt\">&lt;\/artifactId&gt;<\/span>\r\n            <span class=\"nt\">&lt;version&gt;<\/span>2.2.0<span class=\"nt\">&lt;\/version&gt;<\/span>\r\n        <span class=\"nt\">&lt;\/dependency&gt;<\/span>\r\n        <span class=\"nt\">&lt;dependency&gt;<\/span> <span class=\"c\">&lt;!-- Spark dependency --&gt;<\/span>\r\n            <span class=\"nt\">&lt;groupId&gt;<\/span>org.apache.spark<span class=\"nt\">&lt;\/groupId&gt;<\/span>\r\n            <span class=\"nt\">&lt;artifactId&gt;<\/span>spark-streaming-kafka-0-10_2.11<span class=\"nt\">&lt;\/artifactId&gt;<\/span>\r\n            <span class=\"nt\">&lt;version&gt;<\/span>2.1.0<span class=\"nt\">&lt;\/version&gt;<\/span>\r\n        <span class=\"nt\">&lt;\/dependency&gt;<\/span>\r\n    <span class=\"nt\">&lt;\/dependencies&gt;<\/span>\r\n\r\n    <span class=\"nt\">&lt;build&gt;<\/span>\r\n        <span class=\"nt\">&lt;plugins&gt;<\/span>\r\n            <span class=\"nt\">&lt;plugin&gt;<\/span>\r\n                <span class=\"nt\">&lt;artifactId&gt;<\/span>maven-assembly-plugin<span class=\"nt\">&lt;\/artifactId&gt;<\/span>\r\n                <span class=\"nt\">&lt;version&gt;<\/span>3.0.0<span class=\"nt\">&lt;\/version&gt;<\/span>\r\n                <span class=\"nt\">&lt;configuration&gt;<\/span>\r\n                    <span class=\"nt\">&lt;descriptorRefs&gt;<\/span>\r\n                        <span class=\"nt\">&lt;descriptorRef&gt;<\/span>jar-with-dependencies<span class=\"nt\">&lt;\/descriptorRef&gt;<\/span>\r\n                    <span class=\"nt\">&lt;\/descriptorRefs&gt;<\/span>\r\n                    <span class=\"nt\">&lt;archive&gt;<\/span>\r\n                        <span class=\"nt\">&lt;manifest&gt;<\/span>\r\n                            <span class=\"nt\">&lt;mainClass&gt;<\/span>com.example.spark.KafkaWordCount<span class=\"nt\">&lt;\/mainClass&gt;<\/span>\r\n                        <span class=\"nt\">&lt;\/manifest&gt;<\/span>\r\n                    <span class=\"nt\">&lt;\/archive&gt;<\/span>\r\n                <span class=\"nt\">&lt;\/configuration&gt;<\/span>\r\n                <span class=\"nt\">&lt;executions&gt;<\/span>\r\n                    <span class=\"nt\">&lt;execution&gt;<\/span>\r\n                        <span class=\"nt\">&lt;id&gt;<\/span>make-assembly<span class=\"nt\">&lt;\/id&gt;<\/span> <span class=\"c\">&lt;!-- this is used for inheritance merges --&gt;<\/span>\r\n                        <span class=\"nt\">&lt;phase&gt;<\/span>package<span class=\"nt\">&lt;\/phase&gt;<\/span> <span class=\"c\">&lt;!-- bind to the packaging phase --&gt;<\/span>\r\n                        <span class=\"nt\">&lt;goals&gt;<\/span>\r\n                            <span class=\"nt\">&lt;goal&gt;<\/span>single<span class=\"nt\">&lt;\/goal&gt;<\/span>\r\n                        <span class=\"nt\">&lt;\/goals&gt;<\/span>\r\n                    <span class=\"nt\">&lt;\/execution&gt;<\/span>\r\n                <span class=\"nt\">&lt;\/executions&gt;<\/span>\r\n            <span class=\"nt\">&lt;\/plugin&gt;<\/span>\r\n        <span class=\"nt\">&lt;\/plugins&gt;<\/span>\r\n    <span class=\"nt\">&lt;\/build&gt;<\/span>\r\n<span class=\"nt\">&lt;\/project&gt;<\/span>\r\n<\/code><\/pre>\n<h2>\u6267\u884c<\/h2>\n<ul class=\"post-ul\">\u4f5c\u6210\u3057\u305f JAR \u30d5\u30a1\u30a4\u30eb\u3092\u30b3\u30f3\u30c6\u30ca\u306b\u30b3\u30d4\u30fc\u3057\u3066\u30ad\u30c3\u30af\u30b9\u30af\u30ea\u30d7\u30c8\u3067\u5b9f\u884c\u3059\u308b<\/ul>\n<pre class=\"post-pre\"><code># \u30d5\u30a1\u30a4\u30eb\u306e\u30b3\u30d4\u30fc\r\nkubectl cp spark-app-1.0-SNAPSHOT-jar-with-dependencies.jar spark-master-controller-xxxxxxxxxx:\/opt\/spark\/jars\/\r\n\r\n# \u30ed\u30b0\u30a4\u30f3\r\nkubectl exec -it spark-master-controller-xxxxxxxxxx \/bin\/sh\r\n\r\n# \u5b9f\u884c\r\n\/opt\/spark\/bin\/spark-submit \\\r\n  --class com.example.spark.KafkaWordCount \\\r\n  --master spark:\/\/spark-master:7077 \\\r\n  \/opt\/spark\/jars\/spark-app-1.0-SNAPSHOT-jar-with-dependencies.jar \\\r\n  kafka-0:9092 topic-0\r\n<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>Apache Spark \u5c31\u662f&#8230; \u5206\u6563\u51e6\u7406\u3092\u884c\u3046\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af &nbsp; \u8907\u6570\u53f0\u3067\u30af\u30e9\u30b9\u30bf\u69cb\u6210 [&hellip;]<\/p>\n","protected":false},"author":10,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-36404","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"yoast_head":"<!-- This site is optimized with the Yoast SEO Premium plugin v21.5 (Yoast SEO v21.5) - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>\u5c1d\u8bd5\u4f7f\u7528Apache Spark - Blog - Silicon Cloud<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.silicloud.com\/zh\/blog\/\u5c1d\u8bd5\u4f7f\u7528apache-spark\/\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u5c1d\u8bd5\u4f7f\u7528Apache Spark\" \/>\n<meta property=\"og:description\" content=\"Apache Spark \u5c31\u662f&#8230; \u5206\u6563\u51e6\u7406\u3092\u884c\u3046\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af &nbsp; \u8907\u6570\u53f0\u3067\u30af\u30e9\u30b9\u30bf\u69cb\u6210 [&hellip;]\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.silicloud.com\/zh\/blog\/\u5c1d\u8bd5\u4f7f\u7528apache-spark\/\" \/>\n<meta property=\"og:site_name\" content=\"Blog - Silicon Cloud\" \/>\n<meta property=\"article:published_time\" content=\"2022-11-23T10:30:37+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2024-04-28T16:07:39+00:00\" \/>\n<meta name=\"author\" content=\"\u5b87, \u534e\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"\u5b87, \u534e\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"7 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8apache-spark\/\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8apache-spark\/\",\"name\":\"\u5c1d\u8bd5\u4f7f\u7528Apache Spark - Blog - Silicon Cloud\",\"isPartOf\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\"},\"datePublished\":\"2022-11-23T10:30:37+00:00\",\"dateModified\":\"2024-04-28T16:07:39+00:00\",\"author\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/513018e4e121d3add1b7c5de8be21458\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8apache-spark\/#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8apache-spark\/\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8apache-spark\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.silicloud.com\/zh\/blog\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u5c1d\u8bd5\u4f7f\u7528Apache Spark\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/\",\"name\":\"Blog - Silicon Cloud\",\"description\":\"\",\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/513018e4e121d3add1b7c5de8be21458\",\"name\":\"\u5b87, \u534e\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/secure.gravatar.com\/avatar\/63cd45cbc05a35fc4ff7637a163c83c4962ef58d27472726c3a3e0c9c5194f0f?s=96&d=mm&r=g\",\"contentUrl\":\"https:\/\/secure.gravatar.com\/avatar\/63cd45cbc05a35fc4ff7637a163c83c4962ef58d27472726c3a3e0c9c5194f0f?s=96&d=mm&r=g\",\"caption\":\"\u5b87, \u534e\"},\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/author\/yuhua\/\"},{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8apache-spark\/#local-main-organization-logo\",\"url\":\"\",\"contentUrl\":\"\",\"caption\":\"Blog - Silicon Cloud\"}]}<\/script>\n<!-- \/ Yoast SEO Premium plugin. -->","yoast_head_json":{"title":"\u5c1d\u8bd5\u4f7f\u7528Apache Spark - Blog - Silicon Cloud","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.silicloud.com\/zh\/blog\/\u5c1d\u8bd5\u4f7f\u7528apache-spark\/","og_locale":"zh_CN","og_type":"article","og_title":"\u5c1d\u8bd5\u4f7f\u7528Apache Spark","og_description":"Apache Spark \u5c31\u662f&#8230; \u5206\u6563\u51e6\u7406\u3092\u884c\u3046\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af &nbsp; \u8907\u6570\u53f0\u3067\u30af\u30e9\u30b9\u30bf\u69cb\u6210 [&hellip;]","og_url":"https:\/\/www.silicloud.com\/zh\/blog\/\u5c1d\u8bd5\u4f7f\u7528apache-spark\/","og_site_name":"Blog - Silicon Cloud","article_published_time":"2022-11-23T10:30:37+00:00","article_modified_time":"2024-04-28T16:07:39+00:00","author":"\u5b87, \u534e","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"\u5b87, \u534e","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"7 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8apache-spark\/","url":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8apache-spark\/","name":"\u5c1d\u8bd5\u4f7f\u7528Apache Spark - Blog - Silicon Cloud","isPartOf":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website"},"datePublished":"2022-11-23T10:30:37+00:00","dateModified":"2024-04-28T16:07:39+00:00","author":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/513018e4e121d3add1b7c5de8be21458"},"breadcrumb":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8apache-spark\/#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8apache-spark\/"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8apache-spark\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.silicloud.com\/zh\/blog\/"},{"@type":"ListItem","position":2,"name":"\u5c1d\u8bd5\u4f7f\u7528Apache Spark"}]},{"@type":"WebSite","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website","url":"https:\/\/www.silicloud.com\/zh\/blog\/","name":"Blog - Silicon Cloud","description":"","inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/513018e4e121d3add1b7c5de8be21458","name":"\u5b87, \u534e","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/","url":"https:\/\/secure.gravatar.com\/avatar\/63cd45cbc05a35fc4ff7637a163c83c4962ef58d27472726c3a3e0c9c5194f0f?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/63cd45cbc05a35fc4ff7637a163c83c4962ef58d27472726c3a3e0c9c5194f0f?s=96&d=mm&r=g","caption":"\u5b87, \u534e"},"url":"https:\/\/www.silicloud.com\/zh\/blog\/author\/yuhua\/"},{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8apache-spark\/#local-main-organization-logo","url":"","contentUrl":"","caption":"Blog - Silicon Cloud"}]}},"_links":{"self":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/36404","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/users\/10"}],"replies":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/comments?post=36404"}],"version-history":[{"count":2,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/36404\/revisions"}],"predecessor-version":[{"id":67163,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/36404\/revisions\/67163"}],"wp:attachment":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/media?parent=36404"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/categories?post=36404"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/tags?post=36404"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}