{"id":47297,"date":"2023-10-12T06:30:11","date_gmt":"2022-11-14T02:26:09","guid":{"rendered":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%bf%e7%94%a8python%e8%bf%9e%e6%8e%a5sensortag%e3%80%81kafka%e5%92%8cspark-streaming%e8%bf%9b%e8%a1%8c%e6%b5%81%e5%a4%84%e7%90%86-%e7%ac%ac5%e9%83%a8%e5%88%86%ef%bc%9a%e9%80%9a%e8%bf%87apache-t\/"},"modified":"2024-05-04T03:48:28","modified_gmt":"2024-05-03T19:48:28","slug":"%e4%bd%bf%e7%94%a8python%e8%bf%9e%e6%8e%a5sensortag%e3%80%81kafka%e5%92%8cspark-streaming%e8%bf%9b%e8%a1%8c%e6%b5%81%e5%a4%84%e7%90%86-%e7%ac%ac5%e9%83%a8%e5%88%86%ef%bc%9a%e9%80%9a%e8%bf%87apache-t","status":"publish","type":"post","link":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%bf%e7%94%a8python%e8%bf%9e%e6%8e%a5sensortag%e3%80%81kafka%e5%92%8cspark-streaming%e8%bf%9b%e8%a1%8c%e6%b5%81%e5%a4%84%e7%90%86-%e7%ac%ac5%e9%83%a8%e5%88%86%ef%bc%9a%e9%80%9a%e8%bf%87apache-t\/","title":{"rendered":"\u4f7f\u7528Python\u8fde\u63a5SensorTag\u3001Kafka\u548cSpark Streaming\u8fdb\u884c\u6d41\u5904\u7406 &#8211; \u7b2c5\u90e8\u5206\uff1a\u901a\u8fc7Apache Toree\u5728Jupyter\u4e0a\u8fde\u63a5Spark"},"content":{"rendered":"<p>\u6211\u51c6\u5907\u914d\u7f6e\u4e00\u4e2aSpark\u96c6\u7fa4\uff0c\u5e76\u7f16\u5199\u4e00\u4e9b\u793a\u4f8b\u4ee3\u7801\u3002\u6211\u8ba4\u4e3a\u5f88\u591a\u4eba\u90fd\u5c06Jupyter\u4f5c\u4e3aPython\u6570\u636e\u5206\u6790\u548c\u673a\u5668\u5b66\u4e60\u7684\u6267\u884c\u73af\u5883\u3002\u4f7f\u7528Apache Toree\uff0c\u6211\u4eec\u53ef\u4ee5\u5728Jupyter\u4e2d\u4ee5\u4ea4\u4e92\u7684\u65b9\u5f0f\u7f16\u5199Spark\u5e94\u7528\u7a0b\u5e8f\u3002\u60a8\u751a\u81f3\u53ef\u4ee5\u4f7f\u7528Scala\u7684REPL\uff08\u4ea4\u4e92\u5f0f\u7f16\u7a0b\u73af\u5883\uff09\u76f4\u63a5\u5728\u6d4f\u89c8\u5668\u4e0a\u8fd0\u884c\uff0c\u5e76\u5728Jupyter\u4e2d\u4f7f\u7528\u5b83\u3002<\/p>\n<h2>\u95ea\u8000<\/h2>\n<p>\u5728Docker Compose\u4e2d\u5efa\u7acbSpark\u96c6\u7fa4\u3002Docker Hub\u548cGitHub\u4e0a\u5df2\u7ecf\u53d1\u5e03\u4e86\u8bb8\u591a\u7528\u4e8eSpark\u72ec\u7acb\u96c6\u7fa4\u7684\u955c\u50cf\u548cdocker-compose.yml\u6587\u4ef6\u3002<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">semantive\/spark<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">produktion\/jupyter-pyspark<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">gettyimages\/docker-spark<\/ul>\n<p>\u6211\u5c1d\u8bd5\u4e86\u51e0\u4e2a\u9009\u9879\uff0c\u4f46\u662fsemantive\/spark\u7ed9\u6211\u7559\u4e0b\u4e86\u7b80\u5355\u6613\u7528\u7684\u5370\u8c61\u3002<\/p>\n<h3>Docker Compose\uff1a<\/h3>\n<p>\u5173\u4e8esemantive\/spark \uc774\ubbf8\uc9c0\u7684\u4f7f\u7528\u65b9\u6cd5\uff0c\u8bf7\u67e5\u9605Docker Images For Apache Spark\u3002Docker Hub\u5728\u8fd9\u91cc\uff0cGitHub\u5728\u8fd9\u91cc\u3002<\/p>\n<p>\u4ece\u5b58\u50a8\u5e93\u4e2d\u7684docker-compose.yml\u6587\u4ef6\u4e2d\u8fdb\u884c\u4e86\u4e00\u4e9b\u66f4\u6539\u3002\u4e3b\u8981\u66f4\u6539\u5305\u62ec\u660e\u786e\u6307\u5b9a\u955c\u50cf\u6807\u7b7e\u4ee5\u4f7fSpark\u7248\u672c\u5339\u914d\uff0c\u5e76\u6307\u5b9aSPARK_PUBLIC_DNS\u548cSPARK_MASTER_HOST\u73af\u5883\u53d8\u91cf\u4e3a\u4e91\u4e0a\u865a\u62df\u673a\u7684\u516c\u5171IP\u5730\u5740\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"na\">version<\/span><span class=\"pi\">:<\/span> <span class=\"s1\">'<\/span><span class=\"s\">2'<\/span>\r\n<span class=\"na\">services<\/span><span class=\"pi\">:<\/span>\r\n  <span class=\"na\">master<\/span><span class=\"pi\">:<\/span>\r\n    <span class=\"na\">image<\/span><span class=\"pi\">:<\/span> <span class=\"s\">semantive\/spark:spark-2.1.1-hadoop-2.7.3<\/span>\r\n    <span class=\"na\">command<\/span><span class=\"pi\">:<\/span> <span class=\"s\">bin\/spark-class org.apache.spark.deploy.master.Master -h master<\/span>\r\n    <span class=\"na\">hostname<\/span><span class=\"pi\">:<\/span> <span class=\"s\">master<\/span>\r\n    <span class=\"na\">environment<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"na\">MASTER<\/span><span class=\"pi\">:<\/span> <span class=\"s\">spark:\/\/master:7077<\/span>\r\n      <span class=\"na\">SPARK_CONF_DIR<\/span><span class=\"pi\">:<\/span> <span class=\"s\">\/conf<\/span>\r\n      <span class=\"na\">SPARK_PUBLIC_DNS<\/span><span class=\"pi\">:<\/span> <span class=\"s\">&lt;\u4eee\u60f3\u30de\u30b7\u30f3\u306e\u30d1\u30d6\u30ea\u30c3\u30afIP\u30a2\u30c9\u30ec\u30b9&gt;<\/span>\r\n      <span class=\"na\">SPARK_MASTER_HOST<\/span><span class=\"pi\">:<\/span> <span class=\"s\">&lt;\u4eee\u60f3\u30de\u30b7\u30f3\u306e\u30d1\u30d6\u30ea\u30c3\u30afIP\u30a2\u30c9\u30ec\u30b9&gt;<\/span>\r\n    <span class=\"na\">ports<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"pi\">-<\/span> <span class=\"s\">4040:4040<\/span>\r\n      <span class=\"pi\">-<\/span> <span class=\"s\">6066:6066<\/span>\r\n      <span class=\"pi\">-<\/span> <span class=\"s\">7077:7077<\/span>\r\n      <span class=\"pi\">-<\/span> <span class=\"s\">8080:8080<\/span>\r\n    <span class=\"na\">volumes<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"pi\">-<\/span> <span class=\"s\">spark_data:\/tmp\/data<\/span>\r\n\r\n  <span class=\"na\">worker1<\/span><span class=\"pi\">:<\/span>\r\n    <span class=\"na\">image<\/span><span class=\"pi\">:<\/span> <span class=\"s\">semantive\/spark:spark-2.1.1-hadoop-2.7.3<\/span>\r\n    <span class=\"na\">command<\/span><span class=\"pi\">:<\/span> <span class=\"s\">bin\/spark-class org.apache.spark.deploy.worker.Worker spark:\/\/master:7077<\/span>\r\n    <span class=\"na\">hostname<\/span><span class=\"pi\">:<\/span> <span class=\"s\">worker1<\/span>\r\n    <span class=\"na\">environment<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"na\">SPARK_CONF_DIR<\/span><span class=\"pi\">:<\/span> <span class=\"s\">\/conf<\/span>\r\n      <span class=\"na\">SPARK_WORKER_CORES<\/span><span class=\"pi\">:<\/span> <span class=\"m\">4<\/span>\r\n      <span class=\"na\">SPARK_WORKER_MEMORY<\/span><span class=\"pi\">:<\/span> <span class=\"s\">2g<\/span>\r\n      <span class=\"na\">SPARK_WORKER_PORT<\/span><span class=\"pi\">:<\/span> <span class=\"m\">8881<\/span>\r\n      <span class=\"na\">SPARK_WORKER_WEBUI_PORT<\/span><span class=\"pi\">:<\/span> <span class=\"m\">8081<\/span>\r\n      <span class=\"na\">SPARK_PUBLIC_DNS<\/span><span class=\"pi\">:<\/span> <span class=\"s\">&lt;\u4eee\u60f3\u30de\u30b7\u30f3\u306e\u30d1\u30d6\u30ea\u30c3\u30afIP\u30a2\u30c9\u30ec\u30b9&gt;<\/span>\r\n    <span class=\"na\">depends_on<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"pi\">-<\/span> <span class=\"s\">master<\/span>\r\n    <span class=\"na\">ports<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"pi\">-<\/span> <span class=\"s\">8081:8081<\/span>\r\n    <span class=\"na\">volumes<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"pi\">-<\/span> <span class=\"s\">spark_data:\/tmp\/data<\/span>\r\n\r\n  <span class=\"na\">worker2<\/span><span class=\"pi\">:<\/span>\r\n    <span class=\"na\">image<\/span><span class=\"pi\">:<\/span> <span class=\"s\">semantive\/spark:spark-2.1.1-hadoop-2.7.3<\/span>\r\n    <span class=\"na\">command<\/span><span class=\"pi\">:<\/span> <span class=\"s\">bin\/spark-class org.apache.spark.deploy.worker.Worker spark:\/\/master:7077<\/span>\r\n    <span class=\"na\">hostname<\/span><span class=\"pi\">:<\/span> <span class=\"s\">worker2<\/span>\r\n    <span class=\"na\">environment<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"na\">SPARK_CONF_DIR<\/span><span class=\"pi\">:<\/span> <span class=\"s\">\/conf<\/span>\r\n      <span class=\"na\">SPARK_WORKER_CORES<\/span><span class=\"pi\">:<\/span> <span class=\"m\">4<\/span>\r\n      <span class=\"na\">SPARK_WORKER_MEMORY<\/span><span class=\"pi\">:<\/span> <span class=\"s\">2g<\/span>\r\n      <span class=\"na\">SPARK_WORKER_PORT<\/span><span class=\"pi\">:<\/span> <span class=\"m\">8882<\/span>\r\n      <span class=\"na\">SPARK_WORKER_WEBUI_PORT<\/span><span class=\"pi\">:<\/span> <span class=\"m\">8082<\/span>\r\n      <span class=\"na\">SPARK_PUBLIC_DNS<\/span><span class=\"pi\">:<\/span> <span class=\"s\">&lt;\u4eee\u60f3\u30de\u30b7\u30f3\u306e\u30d1\u30d6\u30ea\u30c3\u30afIP\u30a2\u30c9\u30ec\u30b9&gt;<\/span>\r\n    <span class=\"na\">depends_on<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"pi\">-<\/span> <span class=\"s\">master<\/span>\r\n    <span class=\"na\">ports<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"pi\">-<\/span> <span class=\"s\">8082:8082<\/span>\r\n    <span class=\"na\">volumes<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"pi\">-<\/span> <span class=\"s\">spark_data:\/tmp\/data<\/span>\r\n\r\n<span class=\"na\">volumes<\/span><span class=\"pi\">:<\/span>\r\n  <span class=\"na\">spark_data<\/span><span class=\"pi\">:<\/span>\r\n    <span class=\"na\">driver<\/span><span class=\"pi\">:<\/span> <span class=\"s\">local<\/span>\r\n<\/code><\/pre>\n<p>\u542f\u52a8Spark\u72ec\u7acb\u96c6\u7fa4\u3002<\/p>\n<pre class=\"post-pre\"><code>$ docker-compose up -d\r\n<\/code><\/pre>\n<p>\u6253\u5f00Spark Master UI\uff0c\u67e5\u770b\u96c6\u7fa4\u7684\u72b6\u6001\u3002<\/p>\n<pre class=\"post-pre\"><code>http:\/\/&lt;\u4eee\u60f3\u30de\u30b7\u30f3\u306e\u30d1\u30d6\u30ea\u30c3\u30afIP\u30a2\u30c9\u30ec\u30b9&gt;:8080\r\n<\/code><\/pre>\n<p>\u6267\u884cMaster\u5bb9\u5668\u4e2d\u7684spark-shell\uff0c\u68c0\u67e5Scala\u548cSpark\u7684\u7248\u672c\u3002Spark\u7684\u5f00\u53d1\u901f\u5ea6\u975e\u5e38\u5feb\uff0c\u9700\u8981\u7ecf\u5e38\u68c0\u67e5Scala\u7248\u672c\uff0c\u4ee5\u514d\u9047\u5230\u610f\u60f3\u4e0d\u5230\u7684\u9519\u8bef\u3002<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">Scala: 2.11.8<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">Spark: 2.1.1<\/ul>\n<pre class=\"post-pre\"><code>$ docker-compose exec master spark-shell\r\n...\r\nWelcome to\r\n      ____              __\r\n     \/ __\/__  ___ _____\/ \/__\r\n    _\\ \\\/ _ \\\/ _ `\/ __\/  '_\/\r\n   \/___\/ .__\/\\_,_\/_\/ \/_\/\\_\\   version 2.1.1\r\n      \/_\/\r\n\r\nUsing Scala version 2.11.8 (OpenJDK 64-Bit Server VM, Java 1.8.0_131)\r\nType in expressions to have them evaluated.\r\nType :help for more information.\r\n\r\nscala&gt;\r\n<\/code><\/pre>\n<h2>\u6731\u5e87\u7279<\/h2>\n<p>\u6211\u4eec\u5c06\u4f7f\u7528\u5b98\u65b9\u7684jupyter\/all-spark-notebook Docker\u955c\u50cf\u3002\u8fd9\u662f\u4e00\u4e2a\u5305\u542bScala\u548cSpark\u7684\u4e00\u7ad9\u5f0f\u955c\u50cf\u3002<\/p>\n<h3>\u963f\u5e15\u5947\u6258\u4e3d<\/h3>\n<p>Apache Toree\u662f\u8fde\u63a5Jupyter\u5230Spark\u96c6\u7fa4\u7684\u5de5\u5177\u3002\u9664\u4e86PySpark\u5916\uff0c\u8fd8\u63d0\u4f9b\u4e86Scala\u3001SparkR\u548cSQL\u7684\u5185\u6838\u3002<\/p>\n<p>\u5f53\u67e5\u770bDockerfile\u65f6\uff0c\u53d1\u73b0\u5b89\u88c5\u4e86Apache Toree\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"c\"># Apache Toree kernel<\/span>\r\n<span class=\"k\">RUN <\/span>pip <span class=\"nt\">--no-cache-dir<\/span> <span class=\"nb\">install <\/span>https:\/\/dist.apache.org\/repos\/dist\/dev\/incubator\/toree\/0.2.0\/snapshots\/dev1\/toree-pip\/toree-0.2.0.dev1.tar.gz\r\n<span class=\"k\">RUN <\/span>jupyter toree <span class=\"nb\">install<\/span> <span class=\"nt\">--sys-prefix<\/span>\r\n<\/code><\/pre>\n<h3>docker-compose.yml\u6587\u4ef6<\/h3>\n<p>\u5c06Jupyter\u670d\u52a1\u6dfb\u52a0\u5230Spark Standalone\u96c6\u7fa4\u7684docker-compose.yml\u6587\u4ef6\u4e2d\u3002<\/p>\n<pre class=\"post-pre\"><code>  <span class=\"na\">jupyter<\/span><span class=\"pi\">:<\/span>\r\n    <span class=\"na\">image<\/span><span class=\"pi\">:<\/span> <span class=\"s\">jupyter\/all-spark-notebook:c1b0cf6bf4d6<\/span>\r\n    <span class=\"na\">depends_on<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"pi\">-<\/span> <span class=\"s\">master<\/span>\r\n    <span class=\"na\">ports<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"pi\">-<\/span> <span class=\"s\">8888:8888<\/span>\r\n    <span class=\"na\">volumes<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"pi\">-<\/span> <span class=\"s\">.\/notebooks:\/home\/jovyan\/work<\/span>\r\n      <span class=\"pi\">-<\/span> <span class=\"s\">.\/ivy2:\/home\/jovyan\/.ivy2<\/span>\r\n    <span class=\"na\">env_file<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"pi\">-<\/span> <span class=\"s\">.\/.env<\/span>\r\n    <span class=\"na\">environment<\/span><span class=\"pi\">:<\/span>\r\n      <span class=\"na\">TINI_SUBREAPER<\/span><span class=\"pi\">:<\/span> <span class=\"s1\">'<\/span><span class=\"s\">true'<\/span>\r\n      <span class=\"na\">SPARK_OPTS<\/span><span class=\"pi\">:<\/span> <span class=\"s\">--master spark:\/\/master:7077 --deploy-mode client --packages com.amazonaws:aws-java-sdk:1.7.4,org.apache.hadoop:hadoop-aws:2.7.3<\/span>\r\n    <span class=\"na\">command<\/span><span class=\"pi\">:<\/span> <span class=\"s\">start-notebook.sh --NotebookApp.password=sha1:xxx --NotebookApp.iopub_data_rate_limit=10000000<\/span>\r\n<\/code><\/pre>\n<h2>\u5173\u4e8eJupyter\u670d\u52a1\u7684\u9009\u9879<\/h2>\n<p>\u5728Spark Standalone\u96c6\u7fa4\u4e2d\uff0c\u7531\u4e8e\u6ca1\u6709\u4f7f\u7528Hadoop\uff0c\u56e0\u6b64\u6211\u4eec\u6dfb\u52a0\u4e86\u914d\u7f6e\u4ee5\u4f7f\u7528Amazon S3\u4f5c\u4e3a\u5206\u5e03\u5f0f\u6587\u4ef6\u7cfb\u7edf\u3002\u8fd9\u5bf9\u4e8e\u4fdd\u5b58\u793a\u4f8b\u6570\u636e\u548cParquet\u6587\u4ef6\u975e\u5e38\u65b9\u4fbf\u3002<\/p>\n<h3>\u56fe\u7247 (t\u00fa<\/h3>\n<p>Jupyter \/ all-spark-notebook\u56fe\u50cf\u7ecf\u5e38\u66f4\u65b0\u3002 \u4f7f\u7528Apache Toree\u7684Spark\u548cSpark\u96c6\u7fa4\u7684\u7248\u672c\u51fa\u9519\u5e76\u505c\u6b62\u542f\u52a8\u3002 \u7531\u4e8e\u672c\u6b21Spark\u96c6\u7fa4\u7684\u7248\u672c\u662f2.1.1\uff0c\u56e0\u6b64\u5c06\u6307\u5b9a\u76f8\u540c\u7248\u672c\u7684\u56fe\u50cf\u6807\u7b7e\u3002 \u5bf9\u4e8ejupyter \/ all-spark-notebook\u56fe\u50cf\uff0c\u53ea\u77e5\u9053ID\u800c\u4e0d\u77e5\u9053\u6807\u7b7e\u662f\u4e0d\u65b9\u4fbf\u7684\u3002<\/p>\n<p>\u7531\u4e8eSpark\u7248\u672c\u5df2\u7ecf\u5347\u7ea7\u5230\u4e862.2.0\uff0c\u6240\u4ee5\u6211\u4eec\u4f1a\u4f7f\u75282.1.1\u7684\u6807\u7b7e\u3002<br \/>\n\u6211\u4eec\u5c06\u62c9\u53d6\u6807\u7b7e\u4e3aDocker\u955c\u50cf\u5e76\u5728spark-shell\u4e2d\u8fdb\u884c\u786e\u8ba4\u3002<\/p>\n<pre class=\"post-pre\"><code>$ docker pull jupyter\/all-spark-notebook:c1b0cf6bf4d6\r\n$ docker run -it --rm \\\r\n  jupyter\/all-spark-notebook:c1b0cf6bf4d6 \\\r\n  \/usr\/local\/spark-2.1.1-bin-hadoop2.7\/bin\/spark-shell\r\n<\/code><\/pre>\n<p>\u786e\u8ba4\u4e86Spark\u96c6\u7fa4\u3001Spark\u548cScala\u7684\u7248\u672c\u76f8\u540c\u3002<\/p>\n<pre class=\"post-pre\"><code>Welcome to\r\n      ____              __\r\n     \/ __\/__  ___ _____\/ \/__\r\n    _\\ \\\/ _ \\\/ _ `\/ __\/  '_\/\r\n   \/___\/ .__\/\\_,_\/_\/ \/_\/\\_\\   version 2.1.1\r\n      \/_\/\r\n\r\nUsing Scala version 2.11.8 (OpenJDK 64-Bit Server VM, Java 1.8.0_131)\r\nType in expressions to have them evaluated.\r\nType :help for more information.\r\n\r\nscala&gt;\r\n<\/code><\/pre>\n<p>\u6211\u4e5f\u4f1a\u68c0\u67e5Jupyter\u7684\u7248\u672c\u3002<\/p>\n<pre class=\"post-pre\"><code>$ docker run -it --rm jupyter\/all-spark-notebook:c1b0cf6bf4d6 jupyter --version\r\n4.3.0\r\n<\/code><\/pre>\n<h3>TINI_SUBREAPER \u548c SPARK_OPTS<\/h3>\n<p>\u8fde\u63a5\u5230\u8fdc\u7a0bSpark\u6240\u9700\u7684\u4e24\u4e2a\u5fc5\u8981\u914d\u7f6e\u662f\u4f7f\u7528Apache Toree\u4eceJupyter\u4e2d\u3002TINI_SUBREAPER\u73af\u5883\u53d8\u91cf\u5c06\u5728init\u4e2d\u4f7f\u7528Tini\u3002<\/p>\n<p>\u5982\u679c\u5728Spark\u4e2d\u4e0d\u4f7f\u7528\u989d\u5916\u7684Jar\u6587\u4ef6\uff0c\u53ea\u9700\u5728SPARK_OPTS\u73af\u5883\u53d8\u91cf\u4e2d\u6307\u5b9a\u4ee5\u4e0b\u5185\u5bb9\u5373\u53ef\u8fde\u63a5\u5230\u8fdc\u7a0b\u7684Spark Standalone\u96c6\u7fa4\u3002\u8fd9\u4e0e\u901a\u5e38\u7684spark-submit\u9009\u9879\u76f8\u540c\u3002<\/p>\n<pre class=\"post-pre\"><code>--master spark:\/\/master:7077 --deploy-mode client\r\n<\/code><\/pre>\n<p>\u5982\u679c\u6709\u989d\u5916\u7684Jar\u6587\u4ef6\uff0c\u5219\u9700\u8981\u6dfb\u52a0&#8211;packages\u6807\u5fd7\u3002\u8fd9\u662f\u8fde\u63a5\u5230\u4e9a\u9a6c\u900aS3\u6240\u9700\u7684\u5305\u3002<\/p>\n<pre class=\"post-pre\"><code>--packages com.amazonaws:aws-java-sdk:1.7.4,org.apache.hadoop:hadoop-aws:2.7.3\r\n<\/code><\/pre>\n<h3>&#8212; \u7b14\u8bb0\u672c\u5e94\u7528\u7a0b\u5e8f\u7684iopub\u6570\u636e\u901f\u7387\u9650\u5236<\/h3>\n<p>\u4f7f\u7528Bokeh\u7b49\u53ef\u89c6\u5316\u5de5\u5177\u5904\u7406\u5927\u578b\u56fe\u50cf\u65f6\uff0c\u9700\u8981\u5728Jupyter\u7684\u542f\u52a8\u811a\u672c\u4e2d\u6307\u5b9a\u9009\u9879\u3002<\/p>\n<ul class=\"post-ul\">IOPub data rate exceeded when viewing image in Jupyter notebook<\/ul>\n<h3>&#8211;\u7b14\u8bb0\u672c\u5e94\u7528\u7a0b\u5e8f\u5bc6\u7801<\/h3>\n<p>Jupyter\u7684\u8eab\u4efd\u9a8c\u8bc1\u65b9\u6cd5\u9ed8\u8ba4\u662f\u4ee4\u724c\u3002\u5982\u679c\u9891\u7e41\u5730\u542f\u52a8\u548c\u4e22\u5f03\u50cfDocker\u5bb9\u5668\u4e00\u6837\u7684\u5bf9\u8c61\uff0c\u6bcf\u6b21\u90fd\u8981\u8f93\u5165\u4e0d\u540c\u7684\u4ee4\u724c\u662f\u5f88\u9ebb\u70e6\u7684\uff0c\u56e0\u6b64\u6211\u4eec\u5c06\u5176\u66f4\u6539\u4e3a\u5bc6\u7801\u9a8c\u8bc1\u3002\u6211\u4eec\u4f7f\u7528ipython\u6765\u83b7\u53d6\u5bc6\u7801\u7684\u54c8\u5e0c\u503c\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"err\">$<\/span> <span class=\"n\">docker<\/span> <span class=\"n\">run<\/span> <span class=\"o\">-<\/span><span class=\"n\">it<\/span> <span class=\"o\">--<\/span><span class=\"n\">rm<\/span> <span class=\"n\">jupyter<\/span><span class=\"o\">\/<\/span><span class=\"nb\">all<\/span><span class=\"o\">-<\/span><span class=\"n\">spark<\/span><span class=\"o\">-<\/span><span class=\"n\">notebook<\/span><span class=\"p\">:<\/span><span class=\"n\">c1b0cf6bf4d6<\/span> <span class=\"n\">ipython<\/span>\r\n<span class=\"n\">Python<\/span> <span class=\"mf\">3.6<\/span><span class=\"p\">.<\/span><span class=\"mi\">1<\/span> <span class=\"o\">|<\/span> <span class=\"n\">packaged<\/span> <span class=\"n\">by<\/span> <span class=\"n\">conda<\/span><span class=\"o\">-<\/span><span class=\"n\">forge<\/span> <span class=\"o\">|<\/span> <span class=\"p\">(<\/span><span class=\"n\">default<\/span><span class=\"p\">,<\/span> <span class=\"n\">May<\/span> <span class=\"mi\">23<\/span> <span class=\"mi\">2017<\/span><span class=\"p\">,<\/span> <span class=\"mi\">14<\/span><span class=\"p\">:<\/span><span class=\"mi\">16<\/span><span class=\"p\">:<\/span><span class=\"mi\">20<\/span><span class=\"p\">)<\/span>\r\n<span class=\"n\">Type<\/span> <span class=\"s\">'copyright'<\/span><span class=\"p\">,<\/span> <span class=\"s\">'credits'<\/span> <span class=\"ow\">or<\/span> <span class=\"s\">'license'<\/span> <span class=\"k\">for<\/span> <span class=\"n\">more<\/span> <span class=\"n\">information<\/span>\r\n<span class=\"n\">IPython<\/span> <span class=\"mf\">6.1<\/span><span class=\"p\">.<\/span><span class=\"mi\">0<\/span> <span class=\"o\">--<\/span> <span class=\"n\">An<\/span> <span class=\"n\">enhanced<\/span> <span class=\"n\">Interactive<\/span> <span class=\"n\">Python<\/span><span class=\"p\">.<\/span> <span class=\"n\">Type<\/span> <span class=\"s\">'?'<\/span> <span class=\"k\">for<\/span> <span class=\"n\">help<\/span><span class=\"p\">.<\/span>\r\n<\/code><\/pre>\n<p>\u751f\u6210\u5bc6\u7801\u7684\u65b9\u5f0f\u5982\u4e0b\u3002\u5c06\u751f\u6210\u7684\u54c8\u5e0c\u503c\u4f5c\u4e3aJupyter\u542f\u52a8\u9009\u9879\u7684\u6307\u5b9a\u503c\u3002<\/p>\n<pre class=\"post-pre\"><code>In [1]: from notebook.auth import passwd\r\nIn [2]: passwd()\r\n\r\nEnter password:\r\nVerify password:\r\nOut[2]: 'sha1:xxx'\r\n<\/code><\/pre>\n<h3>\u6570\u91cf<\/h3>\n<p>\/home\/jovyan\u662f\u8fd0\u884cJupyter\u5bb9\u5668\u7684\u7528\u6237\u7684\u4e3b\u76ee\u5f55\u3002 \u5907\u5fd8\u5f55\u548c\u4e0b\u8f7d\u7684Jar\u6587\u4ef6\u5c06\u88ab\u6302\u8f7d\u5230Docker\u4e3b\u673a\u4e0a\u3002<\/p>\n<h3>\u73af\u5883\u6587\u4ef6<\/h3>\n<p>\u5c06\u73af\u5883\u53d8\u91cf\u5199\u5165.env\u6587\u4ef6\u5e76\u4f20\u9012\u7ed9\u5bb9\u5668\u3002\u8bf7\u6307\u5b9a\u7528\u4e8e\u8fde\u63a5\u4e9a\u9a6c\u900aS3\u7684\u8bbf\u95ee\u5bc6\u94a5\u548c\u79d8\u5bc6\u5bc6\u94a5\u3002<\/p>\n<pre class=\"post-pre\"><code>AWS_ACCESS_KEY_ID=xxx\r\nAWS_SECRET_ACCESS_KEY=xxx\r\n<\/code><\/pre>\n<p>\u4e0d\u8981\u5fd8\u8bb0\u5411.gitignore\u6587\u4ef6\u4e2d\u6dfb\u52a0\uff0c\u4ee5\u907f\u514d\u63d0\u4ea4\u5230Git\u4e2d\u3002<\/p>\n<pre class=\"post-pre\"><code>.env\r\n<\/code><\/pre>\n<h2>\u4f7f\u7528 Jupyter \u6765\u64cd\u4f5c Spark \u548c Amazon S3\u3002<\/h2>\n<p>\u6211\u6253\u7b97\u7528Scala\u548cPython\u5728Jupyter\u4e2d\u7f16\u5199\u4e00\u4e2a\u4f7f\u7528Spark\u548cAmazon S3\u7684\u793a\u4f8b\u3002\u6211\u4eec\u5c06\u4f7f\u7528\u6587\u7ae0\u300a\u4f7f\u7528Apache APIs\u76d1\u63a7\u5b9e\u65f6Uber\u6570\u636e\uff0c\u7b2c\u4e00\u90e8\u5206\uff1aSpark\u673a\u5668\u5b66\u4e60\u300b\u4e2d\u4f7f\u7528\u7684Uber\u63a5\u9001\u6570\u636e\u4f5c\u4e3a\u793a\u4f8b\u3002\u8fd9\u91cc\u53ea\u662f\u7b80\u5355\u5730\u4eceS3\u8bfb\u53d6CSV\u6587\u4ef6\u5e76\u663e\u793a\u3002<\/p>\n<p>\u542f\u52a8docker-compose.yml\u4e2d\u5b9a\u4e49\u7684\u6240\u6709\u670d\u52a1\u3002<\/p>\n<pre class=\"post-pre\"><code>$ docker-compose up -d\r\n<\/code><\/pre>\n<p>\u4f7f\u7528\u6d4f\u89c8\u5668\u6253\u5f00Jupyter\u5e76\u4f7f\u7528\u521a\u521a\u521b\u5efa\u7684\u5bc6\u7801\u767b\u5f55\u3002<\/p>\n<pre class=\"post-pre\"><code>http:\/\/&lt;\u4eee\u60f3\u30de\u30b7\u30f3\u306e\u30d1\u30d6\u30ea\u30c3\u30afIP\u30a2\u30c9\u30ec\u30b9&gt;:8888\r\n<\/code><\/pre>\n<h3>\u6570\u636e\u51c6\u5907<\/h3>\n<p>\u5728\u514b\u9686\u5b58\u50a8\u5e93\u540e\uff0c\u4f7f\u7528s3cmd\u5c06uber.csv\u6587\u4ef6\u653e\u5165\u4efb\u610f\u7684\u5b58\u50a8\u6876\u4e2d\u3002<\/p>\n<pre class=\"post-pre\"><code>$ git clone https:\/\/github.com\/caroljmcdonald\/spark-ml-kmeans-uber\r\n$ cd spark-ml-kmeans-uber\/data\r\n$ s3cmd put uber.csv s3:\/\/&lt;\u30d0\u30b1\u30c3\u30c8\u540d&gt;\/uber-csv\/\r\n<\/code><\/pre>\n<h3>Scala (\u65af\u5361\u62c9) is a programming language.<\/h3>\n<p>\u60a8\u53ef\u4ee5\u5c06\u4ee5\u4e0b\u4ee3\u7801\u5212\u5206\u4e3a\u5355\u5143\u683c\uff0c\u5e76\u4ee5\u4ea4\u4e92\u65b9\u5f0f\u6267\u884c\u4ee5\u786e\u8ba4\u3002\u5982\u679c\u60a8\u8981\u7f16\u5199Scala\u7b14\u8bb0\u672c\uff0c\u8bf7\u4ece\u53f3\u4e0a\u65b9\u7684\u65b0\u5efa\u6309\u94ae\u4e2d\u9009\u62e9Apache Toree &#8211; Scala\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"k\">import<\/span> <span class=\"nn\">org.apache.spark.sql.SparkSession<\/span>\r\n\r\n<span class=\"k\">val<\/span> <span class=\"nv\">spark<\/span> <span class=\"k\">=<\/span> <span class=\"nc\">SparkSession<\/span><span class=\"o\">.<\/span>\r\n    <span class=\"n\">builder<\/span><span class=\"o\">.<\/span>\r\n    <span class=\"nf\">getOrCreate<\/span><span class=\"o\">()<\/span>\r\n\r\n<span class=\"nv\">sc<\/span><span class=\"o\">.<\/span><span class=\"py\">hadoopConfiguration<\/span><span class=\"o\">.<\/span><span class=\"py\">set<\/span><span class=\"o\">(<\/span><span class=\"s\">\"fs.s3a.impl\"<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"org.apache.hadoop.fs.s3a.S3AFileSystem\"<\/span><span class=\"o\">)<\/span>\r\n<span class=\"nv\">sc<\/span><span class=\"o\">.<\/span><span class=\"py\">hadoopConfiguration<\/span><span class=\"o\">.<\/span><span class=\"py\">set<\/span><span class=\"o\">(<\/span><span class=\"s\">\"fs.s3a.fast.upload\"<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"true\"<\/span><span class=\"o\">)<\/span>\r\n\r\n<span class=\"k\">import<\/span> <span class=\"nn\">org.apache.spark.sql.types._<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">org.apache.spark.sql.functions._<\/span>\r\n\r\n<span class=\"k\">val<\/span> <span class=\"nv\">schema<\/span> <span class=\"k\">=<\/span> <span class=\"nc\">StructType<\/span><span class=\"o\">(<\/span>\r\n    <span class=\"nc\">StructField<\/span><span class=\"o\">(<\/span><span class=\"s\">\"dt\"<\/span><span class=\"o\">,<\/span> <span class=\"nc\">TimestampType<\/span><span class=\"o\">,<\/span> <span class=\"kc\">true<\/span><span class=\"o\">)<\/span> <span class=\"o\">::<\/span>\r\n    <span class=\"nc\">StructField<\/span><span class=\"o\">(<\/span><span class=\"s\">\"lat\"<\/span><span class=\"o\">,<\/span> <span class=\"nc\">DoubleType<\/span><span class=\"o\">,<\/span> <span class=\"kc\">true<\/span><span class=\"o\">)<\/span> <span class=\"o\">::<\/span>\r\n    <span class=\"nc\">StructField<\/span><span class=\"o\">(<\/span><span class=\"s\">\"lon\"<\/span><span class=\"o\">,<\/span> <span class=\"nc\">DoubleType<\/span><span class=\"o\">,<\/span> <span class=\"kc\">true<\/span><span class=\"o\">)<\/span> <span class=\"o\">::<\/span>\r\n    <span class=\"nc\">StructField<\/span><span class=\"o\">(<\/span><span class=\"s\">\"base\"<\/span><span class=\"o\">,<\/span> <span class=\"nc\">StringType<\/span><span class=\"o\">,<\/span> <span class=\"kc\">true<\/span><span class=\"o\">)<\/span> <span class=\"o\">::<\/span> <span class=\"nc\">Nil<\/span>\r\n<span class=\"o\">)<\/span>\r\n\r\n<span class=\"k\">val<\/span> <span class=\"nv\">df<\/span> <span class=\"k\">=<\/span> \r\n    <span class=\"nv\">spark<\/span><span class=\"o\">.<\/span><span class=\"py\">read<\/span><span class=\"o\">.<\/span>\r\n    <span class=\"nf\">option<\/span><span class=\"o\">(<\/span><span class=\"s\">\"header\"<\/span><span class=\"o\">,<\/span> <span class=\"kc\">false<\/span><span class=\"o\">).<\/span>\r\n    <span class=\"nf\">schema<\/span><span class=\"o\">(<\/span><span class=\"n\">schema<\/span><span class=\"o\">).<\/span>\r\n    <span class=\"nf\">csv<\/span><span class=\"o\">(<\/span><span class=\"s\">\"s3a:\/\/&lt;\u30d0\u30b1\u30c3\u30c8\u540d&gt;\/uber-csv\/uber.csv\"<\/span><span class=\"o\">)<\/span>\r\n\r\n<span class=\"nv\">df<\/span><span class=\"o\">.<\/span><span class=\"py\">printSchema<\/span>\r\n\r\n<span class=\"nv\">df<\/span><span class=\"o\">.<\/span><span class=\"py\">cache<\/span>\r\n<span class=\"nv\">df<\/span><span class=\"o\">.<\/span><span class=\"py\">show<\/span><span class=\"o\">(<\/span><span class=\"kc\">false<\/span><span class=\"o\">)<\/span>\r\n<\/code><\/pre>\n<p>\u5728Scala\u4e2d\uff0c\u53ef\u4ee5\u8fd9\u6837\u5199StructType\u7684\u6a21\u5f0f\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"k\">val<\/span> <span class=\"nv\">schema<\/span> <span class=\"k\">=<\/span> <span class=\"o\">(<\/span><span class=\"k\">new<\/span> <span class=\"nc\">StructType<\/span><span class=\"o\">).<\/span>\r\n    <span class=\"nf\">add<\/span><span class=\"o\">(<\/span><span class=\"s\">\"dt\"<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"timestamp\"<\/span><span class=\"o\">,<\/span> <span class=\"kc\">true<\/span><span class=\"o\">).<\/span>\r\n    <span class=\"nf\">add<\/span><span class=\"o\">(<\/span><span class=\"s\">\"lat\"<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"double\"<\/span><span class=\"o\">,<\/span> <span class=\"kc\">true<\/span><span class=\"o\">).<\/span>\r\n    <span class=\"nf\">add<\/span><span class=\"o\">(<\/span><span class=\"s\">\"lon\"<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"double\"<\/span><span class=\"o\">,<\/span> <span class=\"kc\">true<\/span><span class=\"o\">).<\/span>\r\n    <span class=\"nf\">add<\/span><span class=\"o\">(<\/span><span class=\"s\">\"base\"<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"string\"<\/span><span class=\"o\">,<\/span> <span class=\"kc\">true<\/span><span class=\"o\">)<\/span>\r\n<\/code><\/pre>\n<p>\u8fd9\u662f\u6700\u540e\u7684df.show(false)\u7684\u8f93\u51fa\u7ed3\u679c\u3002<\/p>\n<pre class=\"post-pre\"><code>+---------------------+-------+--------+------+\r\n|dt                   |lat    |lon     |base  |\r\n+---------------------+-------+--------+------+\r\n|2014-08-01 00:00:00.0|40.729 |-73.9422|B02598|\r\n|2014-08-01 00:00:00.0|40.7476|-73.9871|B02598|\r\n|2014-08-01 00:00:00.0|40.7424|-74.0044|B02598|\r\n|2014-08-01 00:00:00.0|40.751 |-73.9869|B02598|\r\n|2014-08-01 00:00:00.0|40.7406|-73.9902|B02598|\r\n|2014-08-01 00:00:00.0|40.6994|-73.9591|B02617|\r\n|2014-08-01 00:00:00.0|40.6917|-73.9398|B02617|\r\n|2014-08-01 00:00:00.0|40.7063|-73.9223|B02617|\r\n|2014-08-01 00:00:00.0|40.6759|-74.0168|B02617|\r\n|2014-08-01 00:00:00.0|40.7617|-73.9847|B02617|\r\n|2014-08-01 00:00:00.0|40.6969|-73.9064|B02617|\r\n|2014-08-01 00:00:00.0|40.7623|-73.9751|B02617|\r\n|2014-08-01 00:00:00.0|40.6982|-73.9669|B02617|\r\n|2014-08-01 00:00:00.0|40.7553|-73.9253|B02617|\r\n|2014-08-01 00:00:00.0|40.7325|-73.9876|B02682|\r\n|2014-08-01 00:00:00.0|40.6754|-74.017 |B02682|\r\n|2014-08-01 00:00:00.0|40.7303|-74.0029|B02682|\r\n|2014-08-01 00:00:00.0|40.7218|-73.9973|B02682|\r\n|2014-08-01 00:00:00.0|40.7134|-74.0091|B02682|\r\n|2014-08-01 00:00:00.0|40.7194|-73.9964|B02682|\r\n+---------------------+-------+--------+------+\r\nonly showing top 20 rows\r\n<\/code><\/pre>\n<h3>Python \u662f\u4e00\u79cd\u9ad8\u7ea7\u7a0b\u5e8f\u8bbe\u8ba1\u8bed\u8a00\uff0c\u5b83\u88ab\u5e7f\u6cdb\u7528\u4e8e\u8f6f\u4ef6\u5f00\u53d1\u3001\u6570\u636e\u5206\u6790\u548c\u4eba\u5de5\u667a\u80fd\u7b49\u9886\u57df\u3002<\/h3>\n<p>\u5982\u679c\u8981\u7f16\u5199Python 3\u7684Notebook\uff0c\u8bf7\u4ece\u53f3\u4e0a\u89d2\u7684&#8221;New&#8221;\u6309\u94ae\u4e2d\u9009\u62e9Python 3\u3002\u5728\u4e00\u4e2a\u5408\u9002\u7684\u4f4d\u7f6e\u5c06\u4ee5\u4e0b\u4ee3\u7801\u5206\u5272\u6210\u5355\u5143\u683c\u5e76\u6267\u884c\u3002\u4e0eScala\u4e0d\u540c\u7684\u662f\uff0c\u989d\u5916\u7684Jar\u6587\u4ef6\u5e94\u8be5\u5728PYSPARK_SUBMIT_ARGS\u73af\u5883\u53d8\u91cf\u4e2d\u6307\u5b9a\u3002<\/p>\n<p>\u4ee5\u4e0b\u662f\u4f7f\u7528Python\u7f16\u5199Spark\u5e94\u7528\u7a0b\u5e8f\u7684\u65b9\u6cd5\uff0c\u51e0\u4e4e\u4e0eScala\u76f8\u540c\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"kn\">import<\/span> <span class=\"nn\">os<\/span>\r\n<span class=\"n\">os<\/span><span class=\"p\">.<\/span><span class=\"n\">environ<\/span><span class=\"p\">[<\/span><span class=\"s\">'PYSPARK_SUBMIT_ARGS'<\/span><span class=\"p\">]<\/span> <span class=\"o\">=<\/span> <span class=\"s\">'--packages com.amazonaws:aws-java-sdk:1.7.4,org.apache.hadoop:hadoop-aws:2.7.3 pyspark-shell'<\/span>\r\n\r\n<span class=\"kn\">from<\/span> <span class=\"nn\">pyspark.sql<\/span> <span class=\"kn\">import<\/span> <span class=\"n\">SparkSession<\/span>\r\n\r\n<span class=\"n\">spark<\/span> <span class=\"o\">=<\/span> <span class=\"p\">(<\/span>\r\n    <span class=\"n\">SparkSession<\/span><span class=\"p\">.<\/span><span class=\"n\">builder<\/span>\r\n        <span class=\"p\">.<\/span><span class=\"n\">getOrCreate<\/span><span class=\"p\">()<\/span>\r\n<span class=\"p\">)<\/span>\r\n\r\n<span class=\"n\">sc<\/span> <span class=\"o\">=<\/span> <span class=\"n\">spark<\/span><span class=\"p\">.<\/span><span class=\"n\">sparkContext<\/span>\r\n\r\n<span class=\"n\">sc<\/span><span class=\"p\">.<\/span><span class=\"n\">_jsc<\/span><span class=\"p\">.<\/span><span class=\"n\">hadoopConfiguration<\/span><span class=\"p\">().<\/span><span class=\"nb\">set<\/span><span class=\"p\">(<\/span><span class=\"s\">\"fs.s3a.impl\"<\/span><span class=\"p\">,<\/span> <span class=\"s\">\"org.apache.hadoop.fs.s3a.S3AFileSystem\"<\/span><span class=\"p\">)<\/span>\r\n<span class=\"n\">sc<\/span><span class=\"p\">.<\/span><span class=\"n\">_jsc<\/span><span class=\"p\">.<\/span><span class=\"n\">hadoopConfiguration<\/span><span class=\"p\">().<\/span><span class=\"nb\">set<\/span><span class=\"p\">(<\/span><span class=\"s\">\"fs.s3a.fast.upload\"<\/span><span class=\"p\">,<\/span> <span class=\"s\">\"true\"<\/span><span class=\"p\">)<\/span>\r\n\r\n<span class=\"kn\">from<\/span> <span class=\"nn\">pyspark.sql.types<\/span> <span class=\"kn\">import<\/span> <span class=\"o\">*<\/span>\r\n<span class=\"kn\">from<\/span> <span class=\"nn\">pyspark.sql.functions<\/span> <span class=\"kn\">import<\/span> <span class=\"o\">*<\/span>\r\n\r\n<span class=\"n\">schema<\/span> <span class=\"o\">=<\/span> <span class=\"n\">StructType<\/span><span class=\"p\">([<\/span>\r\n    <span class=\"n\">StructField<\/span><span class=\"p\">(<\/span><span class=\"s\">\"dt\"<\/span><span class=\"p\">,<\/span> <span class=\"n\">TimestampType<\/span><span class=\"p\">(),<\/span> <span class=\"bp\">True<\/span><span class=\"p\">),<\/span>\r\n    <span class=\"n\">StructField<\/span><span class=\"p\">(<\/span><span class=\"s\">\"lat\"<\/span><span class=\"p\">,<\/span> <span class=\"n\">DoubleType<\/span><span class=\"p\">(),<\/span> <span class=\"bp\">True<\/span><span class=\"p\">),<\/span>\r\n    <span class=\"n\">StructField<\/span><span class=\"p\">(<\/span><span class=\"s\">\"lon\"<\/span><span class=\"p\">,<\/span> <span class=\"n\">DoubleType<\/span><span class=\"p\">(),<\/span> <span class=\"bp\">True<\/span><span class=\"p\">),<\/span>\r\n    <span class=\"n\">StructField<\/span><span class=\"p\">(<\/span><span class=\"s\">\"base\"<\/span><span class=\"p\">,<\/span> <span class=\"n\">StringType<\/span><span class=\"p\">(),<\/span> <span class=\"bp\">True<\/span><span class=\"p\">)<\/span>\r\n<span class=\"p\">])<\/span>\r\n\r\n<span class=\"n\">df<\/span> <span class=\"o\">=<\/span> <span class=\"p\">(<\/span>\r\n    <span class=\"n\">spark<\/span><span class=\"p\">.<\/span><span class=\"n\">read<\/span>\r\n    <span class=\"p\">.<\/span><span class=\"n\">option<\/span><span class=\"p\">(<\/span><span class=\"s\">\"header\"<\/span><span class=\"p\">,<\/span> <span class=\"bp\">False<\/span><span class=\"p\">)<\/span>\r\n    <span class=\"p\">.<\/span><span class=\"n\">schema<\/span><span class=\"p\">(<\/span><span class=\"n\">schema<\/span><span class=\"p\">)<\/span>\r\n    <span class=\"p\">.<\/span><span class=\"n\">csv<\/span><span class=\"p\">(<\/span><span class=\"s\">\"s3a:\/\/&lt;\u30d0\u30b1\u30c3\u30c8\u540d&gt;\/uber-csv\/uber.csv\"<\/span><span class=\"p\">)<\/span>\r\n<span class=\"p\">)<\/span>\r\n\r\n<span class=\"n\">df<\/span><span class=\"p\">.<\/span><span class=\"n\">printSchema<\/span><span class=\"p\">()<\/span>\r\n\r\n<span class=\"n\">df<\/span><span class=\"p\">.<\/span><span class=\"n\">cache<\/span><span class=\"p\">()<\/span>\r\n<span class=\"n\">df<\/span><span class=\"p\">.<\/span><span class=\"n\">show<\/span><span class=\"p\">(<\/span><span class=\"n\">truncate<\/span><span class=\"o\">=<\/span><span class=\"bp\">False<\/span><span class=\"p\">)<\/span>\r\n<\/code><\/pre>\n<p>\u6700\u540e\u4e00\u884cdf.show(truncate=False)\u7684\u8f93\u51fa\u7ed3\u679c\u4e0e\u4e4b\u524d\u7684Scala\u4ee3\u7801\u76f8\u540c\u3002<\/p>\n<pre class=\"post-pre\"><code>+---------------------+-------+--------+------+\r\n|dt                   |lat    |lon     |base  |\r\n+---------------------+-------+--------+------+\r\n|2014-08-01 00:00:00.0|40.729 |-73.9422|B02598|\r\n|2014-08-01 00:00:00.0|40.7476|-73.9871|B02598|\r\n|2014-08-01 00:00:00.0|40.7424|-74.0044|B02598|\r\n|2014-08-01 00:00:00.0|40.751 |-73.9869|B02598|\r\n|2014-08-01 00:00:00.0|40.7406|-73.9902|B02598|\r\n|2014-08-01 00:00:00.0|40.6994|-73.9591|B02617|\r\n|2014-08-01 00:00:00.0|40.6917|-73.9398|B02617|\r\n|2014-08-01 00:00:00.0|40.7063|-73.9223|B02617|\r\n|2014-08-01 00:00:00.0|40.6759|-74.0168|B02617|\r\n|2014-08-01 00:00:00.0|40.7617|-73.9847|B02617|\r\n|2014-08-01 00:00:00.0|40.6969|-73.9064|B02617|\r\n|2014-08-01 00:00:00.0|40.7623|-73.9751|B02617|\r\n|2014-08-01 00:00:00.0|40.6982|-73.9669|B02617|\r\n|2014-08-01 00:00:00.0|40.7553|-73.9253|B02617|\r\n|2014-08-01 00:00:00.0|40.7325|-73.9876|B02682|\r\n|2014-08-01 00:00:00.0|40.6754|-74.017 |B02682|\r\n|2014-08-01 00:00:00.0|40.7303|-74.0029|B02682|\r\n|2014-08-01 00:00:00.0|40.7218|-73.9973|B02682|\r\n|2014-08-01 00:00:00.0|40.7134|-74.0091|B02682|\r\n|2014-08-01 00:00:00.0|40.7194|-73.9964|B02682|\r\n+---------------------+-------+--------+------+\r\nonly showing top 20 rows\r\n<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u6211\u51c6\u5907\u914d\u7f6e\u4e00\u4e2aSpark\u96c6\u7fa4\uff0c\u5e76\u7f16\u5199\u4e00\u4e9b\u793a\u4f8b\u4ee3\u7801\u3002\u6211\u8ba4\u4e3a\u5f88\u591a\u4eba\u90fd\u5c06Jupyter\u4f5c\u4e3aPython\u6570\u636e\u5206\u6790\u548c\u673a\u5668 [&hellip;]<\/p>\n","protected":false},"author":6,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-47297","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"yoast_head":"<!-- This site is optimized with the Yoast SEO Premium plugin v21.5 (Yoast SEO v21.5) - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>\u4f7f\u7528Python\u8fde\u63a5SensorTag\u3001Kafka\u548cSpark Streaming\u8fdb\u884c\u6d41\u5904\u7406 - \u7b2c5\u90e8\u5206\uff1a\u901a\u8fc7Apache Toree\u5728Jupyter\u4e0a\u8fde\u63a5Spark - Blog - Silicon Cloud<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.silicloud.com\/zh\/blog\/\u4f7f\u7528python\u8fde\u63a5sensortag\u3001kafka\u548cspark-streaming\u8fdb\u884c\u6d41\u5904\u7406-\u7b2c5\u90e8\u5206\uff1a\u901a\u8fc7apache-t\/\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u4f7f\u7528Python\u8fde\u63a5SensorTag\u3001Kafka\u548cSpark Streaming\u8fdb\u884c\u6d41\u5904\u7406 - \u7b2c5\u90e8\u5206\uff1a\u901a\u8fc7Apache Toree\u5728Jupyter\u4e0a\u8fde\u63a5Spark\" \/>\n<meta property=\"og:description\" content=\"\u6211\u51c6\u5907\u914d\u7f6e\u4e00\u4e2aSpark\u96c6\u7fa4\uff0c\u5e76\u7f16\u5199\u4e00\u4e9b\u793a\u4f8b\u4ee3\u7801\u3002\u6211\u8ba4\u4e3a\u5f88\u591a\u4eba\u90fd\u5c06Jupyter\u4f5c\u4e3aPython\u6570\u636e\u5206\u6790\u548c\u673a\u5668 [&hellip;]\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.silicloud.com\/zh\/blog\/\u4f7f\u7528python\u8fde\u63a5sensortag\u3001kafka\u548cspark-streaming\u8fdb\u884c\u6d41\u5904\u7406-\u7b2c5\u90e8\u5206\uff1a\u901a\u8fc7apache-t\/\" \/>\n<meta property=\"og:site_name\" content=\"Blog - Silicon Cloud\" \/>\n<meta property=\"article:published_time\" content=\"2022-11-14T02:26:09+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2024-05-03T19:48:28+00:00\" \/>\n<meta name=\"author\" content=\"\u6587, \u7fd4\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"\u6587, \u7fd4\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"6 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%bf%e7%94%a8python%e8%bf%9e%e6%8e%a5sensortag%e3%80%81kafka%e5%92%8cspark-streaming%e8%bf%9b%e8%a1%8c%e6%b5%81%e5%a4%84%e7%90%86-%e7%ac%ac5%e9%83%a8%e5%88%86%ef%bc%9a%e9%80%9a%e8%bf%87apache-t\/\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%bf%e7%94%a8python%e8%bf%9e%e6%8e%a5sensortag%e3%80%81kafka%e5%92%8cspark-streaming%e8%bf%9b%e8%a1%8c%e6%b5%81%e5%a4%84%e7%90%86-%e7%ac%ac5%e9%83%a8%e5%88%86%ef%bc%9a%e9%80%9a%e8%bf%87apache-t\/\",\"name\":\"\u4f7f\u7528Python\u8fde\u63a5SensorTag\u3001Kafka\u548cSpark Streaming\u8fdb\u884c\u6d41\u5904\u7406 - \u7b2c5\u90e8\u5206\uff1a\u901a\u8fc7Apache Toree\u5728Jupyter\u4e0a\u8fde\u63a5Spark - Blog - Silicon Cloud\",\"isPartOf\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\"},\"datePublished\":\"2022-11-14T02:26:09+00:00\",\"dateModified\":\"2024-05-03T19:48:28+00:00\",\"author\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/64d5cc7727fffbff2f9a2a8da1de3e5c\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%bf%e7%94%a8python%e8%bf%9e%e6%8e%a5sensortag%e3%80%81kafka%e5%92%8cspark-streaming%e8%bf%9b%e8%a1%8c%e6%b5%81%e5%a4%84%e7%90%86-%e7%ac%ac5%e9%83%a8%e5%88%86%ef%bc%9a%e9%80%9a%e8%bf%87apache-t\/#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%bf%e7%94%a8python%e8%bf%9e%e6%8e%a5sensortag%e3%80%81kafka%e5%92%8cspark-streaming%e8%bf%9b%e8%a1%8c%e6%b5%81%e5%a4%84%e7%90%86-%e7%ac%ac5%e9%83%a8%e5%88%86%ef%bc%9a%e9%80%9a%e8%bf%87apache-t\/\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%bf%e7%94%a8python%e8%bf%9e%e6%8e%a5sensortag%e3%80%81kafka%e5%92%8cspark-streaming%e8%bf%9b%e8%a1%8c%e6%b5%81%e5%a4%84%e7%90%86-%e7%ac%ac5%e9%83%a8%e5%88%86%ef%bc%9a%e9%80%9a%e8%bf%87apache-t\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.silicloud.com\/zh\/blog\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u4f7f\u7528Python\u8fde\u63a5SensorTag\u3001Kafka\u548cSpark Streaming\u8fdb\u884c\u6d41\u5904\u7406 &#8211; \u7b2c5\u90e8\u5206\uff1a\u901a\u8fc7Apache Toree\u5728Jupyter\u4e0a\u8fde\u63a5Spark\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/\",\"name\":\"Blog - Silicon Cloud\",\"description\":\"\",\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/64d5cc7727fffbff2f9a2a8da1de3e5c\",\"name\":\"\u6587, \u7fd4\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/secure.gravatar.com\/avatar\/920c3d673e0bccacc98e5e6b7149bb3c22edd8d39cb753e5d7d7e471498118a1?s=96&d=mm&r=g\",\"contentUrl\":\"https:\/\/secure.gravatar.com\/avatar\/920c3d673e0bccacc98e5e6b7149bb3c22edd8d39cb753e5d7d7e471498118a1?s=96&d=mm&r=g\",\"caption\":\"\u6587, \u7fd4\"},\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/author\/wenxiang\/\"},{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%bf%e7%94%a8python%e8%bf%9e%e6%8e%a5sensortag%e3%80%81kafka%e5%92%8cspark-streaming%e8%bf%9b%e8%a1%8c%e6%b5%81%e5%a4%84%e7%90%86-%e7%ac%ac5%e9%83%a8%e5%88%86%ef%bc%9a%e9%80%9a%e8%bf%87apache-t\/#local-main-organization-logo\",\"url\":\"\",\"contentUrl\":\"\",\"caption\":\"Blog - Silicon Cloud\"}]}<\/script>\n<!-- \/ Yoast SEO Premium plugin. -->","yoast_head_json":{"title":"\u4f7f\u7528Python\u8fde\u63a5SensorTag\u3001Kafka\u548cSpark Streaming\u8fdb\u884c\u6d41\u5904\u7406 - \u7b2c5\u90e8\u5206\uff1a\u901a\u8fc7Apache Toree\u5728Jupyter\u4e0a\u8fde\u63a5Spark - Blog - Silicon Cloud","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.silicloud.com\/zh\/blog\/\u4f7f\u7528python\u8fde\u63a5sensortag\u3001kafka\u548cspark-streaming\u8fdb\u884c\u6d41\u5904\u7406-\u7b2c5\u90e8\u5206\uff1a\u901a\u8fc7apache-t\/","og_locale":"zh_CN","og_type":"article","og_title":"\u4f7f\u7528Python\u8fde\u63a5SensorTag\u3001Kafka\u548cSpark Streaming\u8fdb\u884c\u6d41\u5904\u7406 - \u7b2c5\u90e8\u5206\uff1a\u901a\u8fc7Apache Toree\u5728Jupyter\u4e0a\u8fde\u63a5Spark","og_description":"\u6211\u51c6\u5907\u914d\u7f6e\u4e00\u4e2aSpark\u96c6\u7fa4\uff0c\u5e76\u7f16\u5199\u4e00\u4e9b\u793a\u4f8b\u4ee3\u7801\u3002\u6211\u8ba4\u4e3a\u5f88\u591a\u4eba\u90fd\u5c06Jupyter\u4f5c\u4e3aPython\u6570\u636e\u5206\u6790\u548c\u673a\u5668 [&hellip;]","og_url":"https:\/\/www.silicloud.com\/zh\/blog\/\u4f7f\u7528python\u8fde\u63a5sensortag\u3001kafka\u548cspark-streaming\u8fdb\u884c\u6d41\u5904\u7406-\u7b2c5\u90e8\u5206\uff1a\u901a\u8fc7apache-t\/","og_site_name":"Blog - Silicon Cloud","article_published_time":"2022-11-14T02:26:09+00:00","article_modified_time":"2024-05-03T19:48:28+00:00","author":"\u6587, \u7fd4","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"\u6587, \u7fd4","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"6 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%bf%e7%94%a8python%e8%bf%9e%e6%8e%a5sensortag%e3%80%81kafka%e5%92%8cspark-streaming%e8%bf%9b%e8%a1%8c%e6%b5%81%e5%a4%84%e7%90%86-%e7%ac%ac5%e9%83%a8%e5%88%86%ef%bc%9a%e9%80%9a%e8%bf%87apache-t\/","url":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%bf%e7%94%a8python%e8%bf%9e%e6%8e%a5sensortag%e3%80%81kafka%e5%92%8cspark-streaming%e8%bf%9b%e8%a1%8c%e6%b5%81%e5%a4%84%e7%90%86-%e7%ac%ac5%e9%83%a8%e5%88%86%ef%bc%9a%e9%80%9a%e8%bf%87apache-t\/","name":"\u4f7f\u7528Python\u8fde\u63a5SensorTag\u3001Kafka\u548cSpark Streaming\u8fdb\u884c\u6d41\u5904\u7406 - \u7b2c5\u90e8\u5206\uff1a\u901a\u8fc7Apache Toree\u5728Jupyter\u4e0a\u8fde\u63a5Spark - Blog - Silicon Cloud","isPartOf":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website"},"datePublished":"2022-11-14T02:26:09+00:00","dateModified":"2024-05-03T19:48:28+00:00","author":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/64d5cc7727fffbff2f9a2a8da1de3e5c"},"breadcrumb":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%bf%e7%94%a8python%e8%bf%9e%e6%8e%a5sensortag%e3%80%81kafka%e5%92%8cspark-streaming%e8%bf%9b%e8%a1%8c%e6%b5%81%e5%a4%84%e7%90%86-%e7%ac%ac5%e9%83%a8%e5%88%86%ef%bc%9a%e9%80%9a%e8%bf%87apache-t\/#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%bf%e7%94%a8python%e8%bf%9e%e6%8e%a5sensortag%e3%80%81kafka%e5%92%8cspark-streaming%e8%bf%9b%e8%a1%8c%e6%b5%81%e5%a4%84%e7%90%86-%e7%ac%ac5%e9%83%a8%e5%88%86%ef%bc%9a%e9%80%9a%e8%bf%87apache-t\/"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%bf%e7%94%a8python%e8%bf%9e%e6%8e%a5sensortag%e3%80%81kafka%e5%92%8cspark-streaming%e8%bf%9b%e8%a1%8c%e6%b5%81%e5%a4%84%e7%90%86-%e7%ac%ac5%e9%83%a8%e5%88%86%ef%bc%9a%e9%80%9a%e8%bf%87apache-t\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.silicloud.com\/zh\/blog\/"},{"@type":"ListItem","position":2,"name":"\u4f7f\u7528Python\u8fde\u63a5SensorTag\u3001Kafka\u548cSpark Streaming\u8fdb\u884c\u6d41\u5904\u7406 &#8211; \u7b2c5\u90e8\u5206\uff1a\u901a\u8fc7Apache Toree\u5728Jupyter\u4e0a\u8fde\u63a5Spark"}]},{"@type":"WebSite","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website","url":"https:\/\/www.silicloud.com\/zh\/blog\/","name":"Blog - Silicon Cloud","description":"","inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/64d5cc7727fffbff2f9a2a8da1de3e5c","name":"\u6587, \u7fd4","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/","url":"https:\/\/secure.gravatar.com\/avatar\/920c3d673e0bccacc98e5e6b7149bb3c22edd8d39cb753e5d7d7e471498118a1?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/920c3d673e0bccacc98e5e6b7149bb3c22edd8d39cb753e5d7d7e471498118a1?s=96&d=mm&r=g","caption":"\u6587, \u7fd4"},"url":"https:\/\/www.silicloud.com\/zh\/blog\/author\/wenxiang\/"},{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%bf%e7%94%a8python%e8%bf%9e%e6%8e%a5sensortag%e3%80%81kafka%e5%92%8cspark-streaming%e8%bf%9b%e8%a1%8c%e6%b5%81%e5%a4%84%e7%90%86-%e7%ac%ac5%e9%83%a8%e5%88%86%ef%bc%9a%e9%80%9a%e8%bf%87apache-t\/#local-main-organization-logo","url":"","contentUrl":"","caption":"Blog - Silicon Cloud"}]}},"_links":{"self":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/47297","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/users\/6"}],"replies":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/comments?post=47297"}],"version-history":[{"count":2,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/47297\/revisions"}],"predecessor-version":[{"id":95935,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/47297\/revisions\/95935"}],"wp:attachment":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/media?parent=47297"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/categories?post=47297"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/tags?post=47297"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}