{"id":46906,"date":"2023-09-21T07:21:18","date_gmt":"2023-04-22T15:47:38","guid":{"rendered":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%9c%e4%b8%ba%e4%b8%80%e5%90%8d%e5%9f%ba%e7%a1%80%e8%ae%be%e6%96%bd%e5%b7%a5%e7%a8%8b%e5%b8%88%ef%bc%8c%e6%88%91%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8spark-streaming%e5%92%8ckafka%e6%9d%a5%e8%81%9a\/"},"modified":"2024-05-04T00:50:20","modified_gmt":"2024-05-03T16:50:20","slug":"%e4%bd%9c%e4%b8%ba%e4%b8%80%e5%90%8d%e5%9f%ba%e7%a1%80%e8%ae%be%e6%96%bd%e5%b7%a5%e7%a8%8b%e5%b8%88%ef%bc%8c%e6%88%91%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8spark-streaming%e5%92%8ckafka%e6%9d%a5%e8%81%9a","status":"publish","type":"post","link":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%9c%e4%b8%ba%e4%b8%80%e5%90%8d%e5%9f%ba%e7%a1%80%e8%ae%be%e6%96%bd%e5%b7%a5%e7%a8%8b%e5%b8%88%ef%bc%8c%e6%88%91%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8spark-streaming%e5%92%8ckafka%e6%9d%a5%e8%81%9a\/","title":{"rendered":"\u4f5c\u4e3a\u4e00\u540d\u57fa\u7840\u8bbe\u65bd\u5de5\u7a0b\u5e08\uff0c\u6211\u5c1d\u8bd5\u4f7f\u7528Spark Streaming\u548cKafka\u6765\u805a\u5408\u8bbf\u95ee\u65e5\u5fd7"},"content":{"rendered":"<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u30a4\u30f3\u30d5\u30e9\u30a8\u30f3\u30b8\u30cb\u30a2\u3063\u307d\u304fNginx\u306e\u30a2\u30af\u30bb\u30b9\u30ed\u30b0\u3092\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u96c6\u8a08\u3057\u3066\u307f\u308b<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">Sample\u304cScala\u304c\u591a\u3044\u306e\u3067Scala\u3067\u66f8\u3044\u3066\u307f\u305f(\u521dScala)<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u306a\u3046\u306a\u611f\u3058\u3067Nginx=&gt;Fluent=&gt;Kafka=&gt;SparkStreaming<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">Scala\u6c5a\u3044\u306e\u306f\u3086\u308b\u3057\u3066\u306d<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u57fa\u672c\u7684\u306bWorkCount\u306esample\u3092\u3054\u306b\u3087\u3054\u306b\u3087\u3057\u305f\u3060\u3051<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\u3068\u308a\u3042\u3048\u305a\u96c6\u8a08\u3057\u3066\u307f\u308b<\/ul>\n<h1>\u793a\u4f8b\u65e5\u5fd7<\/h1>\n<p>\u6211\u4f7f\u7528\u8fd9\u4e2a\u6765\u51c6\u5907\u4e86LTSV\u683c\u5f0f\u7684\u65e5\u5fd7\uff0c\u53ea\u4fee\u6539\u4e86\u6700\u540e\u4e00\u884c\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"nb\">puts<\/span> <span class=\"s2\">\"time:<\/span><span class=\"si\">#{<\/span><span class=\"no\">Time<\/span><span class=\"p\">.<\/span><span class=\"nf\">at<\/span><span class=\"p\">(<\/span><span class=\"n\">now<\/span><span class=\"p\">).<\/span><span class=\"nf\">strftime<\/span><span class=\"p\">(<\/span><span class=\"s1\">'%d\/%b\/%Y:%H:%M:%S %z'<\/span><span class=\"p\">)<\/span><span class=\"si\">}<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">host:<\/span><span class=\"si\">#{<\/span><span class=\"n\">record<\/span><span class=\"p\">[<\/span><span class=\"s1\">'host'<\/span><span class=\"p\">]<\/span><span class=\"si\">}<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">forwardedfor:<\/span><span class=\"si\">#{<\/span><span class=\"n\">record<\/span><span class=\"p\">[<\/span><span class=\"s1\">'host'<\/span><span class=\"p\">]<\/span><span class=\"si\">}<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">req:<\/span><span class=\"si\">#{<\/span><span class=\"n\">record<\/span><span class=\"p\">[<\/span><span class=\"s1\">'method'<\/span><span class=\"p\">]<\/span><span class=\"si\">}<\/span> <span class=\"si\">#{<\/span><span class=\"n\">record<\/span><span class=\"p\">[<\/span><span class=\"s1\">'path'<\/span><span class=\"p\">]<\/span><span class=\"si\">}<\/span><span class=\"s2\"> HTTP\/1.1<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">status:<\/span><span class=\"si\">#{<\/span><span class=\"n\">record<\/span><span class=\"p\">[<\/span><span class=\"s1\">'code'<\/span><span class=\"p\">]<\/span><span class=\"si\">}<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">size:<\/span><span class=\"si\">#{<\/span><span class=\"n\">record<\/span><span class=\"p\">[<\/span><span class=\"s1\">'size'<\/span><span class=\"p\">]<\/span><span class=\"si\">}<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">referer:<\/span><span class=\"si\">#{<\/span><span class=\"n\">record<\/span><span class=\"p\">[<\/span><span class=\"s1\">'referer'<\/span><span class=\"p\">]<\/span><span class=\"si\">}<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">ua:<\/span><span class=\"si\">#{<\/span><span class=\"n\">record<\/span><span class=\"p\">[<\/span><span class=\"s1\">'agent'<\/span><span class=\"p\">]<\/span><span class=\"si\">}<\/span><span class=\"s2\">\"<\/span>\r\n<\/code><\/pre>\n<pre class=\"post-pre\"><code>time:22\/Dec\/2016:18:07:56 +0900 host:164.81.181.112     forwardedfor:164.81.181.112     req:GET \/category\/office HTTP\/1.1      status:200      size:124        referer:\/item\/games\/3481        ua:Mozilla\/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit\/534.46 (KHTML, like Gecko) Version\/5.1 Mobile\/9A405 Safari\/7534.48.3\r\ntime:22\/Dec\/2016:18:07:59 +0900 host:196.93.44.211      forwardedfor:196.93.44.211      req:GET \/category\/electronics?from=10 HTTP\/1.1 status:200      size:136        referer:\/category\/electronics   ua:Mozilla\/5.0 (Windows NT 6.1; WOW64; rv:10.0.1) Gecko\/20100101 Firefox\/10.0.1\r\ntime:22\/Dec\/2016:18:08:02 +0900 host:20.171.223.57      forwardedfor:20.171.223.57      req:GET \/category\/finance HTTP\/1.1     status:200      size:78 referer:\/category\/office        ua:Mozilla\/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident\/5.0)\r\n<\/code><\/pre>\n<h1>\u6d41\u5229=&gt;Kafka\u7ec4\u4ef6<\/h1>\n<p>\u4f7f\u7528 fluent-plugin-kafka \u5c06\u65e5\u5fd7\u53d1\u9001\u5230 Kafka\u3002<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u5168\u30e1\u30c3\u30bb\u30fc\u30b8\u3092String\u3068\u3057\u3066\u9001\u308a\u8fbc\u3080<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">parse\u306fFluent\u3067\u306f\u306a\u304fSpark\u3067\u3084\u3089\u305b\u308b(\u3069\u3063\u3061\u304c\u3044\u3044\u304b\u306f\u77e5\u3089\u306a\u3044\u3051\u3069Slcala\u306e\u65b9\u304c\u65e9\u3044\u30a4\u30e1\u30fc\u30b8\uff0b\u52c9\u5f37\u306e\u305f\u3081)<\/ul>\n<p>\u4f7f\u7528 &#8220;td-agent-gem install fluent-plugin-kafka&#8221; \u547d\u4ee4\u5b89\u88c5 fluent-plugin-kafka\uff0c\u5e76\u5c06 td-agent.conf \u6587\u4ef6\u653e\u7f6e\u5728\u76f8\u5e94\u4f4d\u7f6e\u3002<\/p>\n<pre class=\"post-pre\"><code>&lt;match **&gt;\r\n  @type kafka\r\n  brokers 10.0.0.65:9092\r\n  zookeeper 10.0.0.65:2181\r\n  default_topic nginx\r\n&lt;\/match&gt;\r\n<\/code><\/pre>\n<h1>\u5361\u592b\u5361\u51c6\u5907<\/h1>\n<h3>\u8bf7\u4e0b\u8f7d\u5e76\u5b89\u88c5\u3002<\/h3>\n<pre class=\"post-pre\"><code><span class=\"nv\">$ <\/span><span class=\"nb\">cd<\/span> \/opt\r\n<span class=\"nv\">$ <\/span>wget https:\/\/www.apache.org\/dyn\/closer.cgi?path<span class=\"o\">=<\/span>\/kafka\/0.10.1.0\/kafka_2.10-0.10.1.0.tgz\r\n<span class=\"nv\">$ <\/span><span class=\"nb\">tar <\/span>xfvz kafka_2.10-0.10.1.0.tgz\r\n<span class=\"nv\">$ <\/span><span class=\"nb\">ln<\/span> <span class=\"nt\">-s<\/span> kafka_2.10-0.10.1.0 kafka\r\n<span class=\"nv\">$ <\/span><span class=\"nb\">cd <\/span>kafka\r\n<\/code><\/pre>\n<h2>\u666e\u901a\u5730\u542f\u52a8Zookeeper\u3002<\/h2>\n<pre class=\"post-pre\"><code><span class=\"nv\">$ <\/span>.\/bin\/zookeeper-server-start.sh <span class=\"nt\">-daemon<\/span> config\/zookeeper.properties \r\n<span class=\"nv\">$ <\/span>jps <span class=\"nt\">-v<\/span> |grep zookeeper\r\n3839 QuorumPeerMain <span class=\"nt\">-Xmx512M<\/span> <span class=\"nt\">-Xms512M<\/span> <span class=\"nt\">-XX<\/span>:+UseG1GC <span class=\"nt\">-XX<\/span>:MaxGCPauseMillis<span class=\"o\">=<\/span>20 <span class=\"nt\">-XX<\/span>:InitiatingHeapOccupancyPercent<span class=\"o\">=<\/span>35 <span class=\"nt\">-XX<\/span>:+DisableExplicitGC <span class=\"nt\">-Djava<\/span>.awt.headless<span class=\"o\">=<\/span><span class=\"nb\">true<\/span> <span class=\"nt\">-Xloggc<\/span>:\/opt\/kafka\/bin\/..\/logs\/zookeeper-gc.log <span class=\"nt\">-verbose<\/span>:gc <span class=\"nt\">-XX<\/span>:+PrintGCDetails <span class=\"nt\">-XX<\/span>:+PrintGCDateStamps <span class=\"nt\">-XX<\/span>:+PrintGCTimeStamps <span class=\"nt\">-Dcom<\/span>.sun.management.jmxremote <span class=\"nt\">-Dcom<\/span>.sun.management.jmxremote.authenticate<span class=\"o\">=<\/span><span class=\"nb\">false<\/span> <span class=\"nt\">-Dcom<\/span>.sun.management.jmxremote.ssl<span class=\"o\">=<\/span><span class=\"nb\">false<\/span> <span class=\"nt\">-Dkafka<\/span>.logs.dir<span class=\"o\">=<\/span>\/opt\/kafka\/bin\/..\/logs <span class=\"nt\">-Dlog4j<\/span>.configuration<span class=\"o\">=<\/span>file:.\/bin\/..\/config\/log4j.properties\r\n<\/code><\/pre>\n<h2>\u4ee5\u5e38\u89c4\u65b9\u5f0f\u542f\u52a8Kafka<\/h2>\n<pre class=\"post-pre\"><code>$ .\/bin\/kafka-server-start.sh -daemon config\/server.properties\r\n$ jps -v |grep Kafka\r\n28603 Kafka -Xmx1G -Xms1G -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC -Djava.awt.headless=true -Xloggc:\/opt\/kafka\/bin\/..\/logs\/kafkaServer-gc.log -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dkafka.logs.dir=\/opt\/kafka\/bin\/..\/logs -Dlog4j.configuration=file:.\/bin\/..\/config\/log4j.properties\r\n<\/code><\/pre>\n<h2>\u6211\u8bd5\u7740\u53d1\u9001\u4e00\u6761\u6d88\u606f\u3002<\/h2>\n<h3>\u542f\u52a8\u8f93\u51fa\u63a7\u5236\u53f0\u7684Worker<\/h3>\n<pre class=\"post-pre\"><code><span class=\"nv\">$ <\/span>.\/bin\/kafka-console-consumer.sh <span class=\"nt\">--consumer-property<\/span><span class=\"o\">=<\/span>config\/consumer.properties <span class=\"nt\">--zookeeper<\/span>.0.0.65:2181 <span class=\"nt\">--topic<\/span> nginx\r\n<\/code><\/pre>\n<h3>\u5c06\u6570\u636e\u6d41\u5165Fluent\u5e76\u67e5\u770b<\/h3>\n<pre class=\"post-pre\"><code><span class=\"nv\">$ <\/span><span class=\"nb\">head <\/span>sample.log| \/opt\/td-agent\/embendfluent-cat <span class=\"nt\">--none<\/span> data.nginx\r\n<\/code><\/pre>\n<h3>\u5de5\u4eba\u5728\u4ed6\u4eec\u7684\u63a7\u5236\u53f0\u4e0a\u8f93\u51fa\u7684\u8bdd\uff0c\u5c31\u53ef\u4ee5\u4e86\u3002<\/h3>\n<pre class=\"post-pre\"><code><span class=\"o\">{<\/span><span class=\"s2\">\"message\"<\/span>:<span class=\"s2\">\"time:22\/Dec\/2016:18:07:56 +0900<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">host:164.81.181.112<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">forwardedfor:164.81.181.112<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">req:GET \/category\/office HTTP\/1.1<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">status:200<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">size:124<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">referer:\/item\/games\/3481<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">ua:Mozilla\/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit\/534.46 (KHTML, like Gecko) Version\/5.1 Mobile\/9A405 Safari\/7534.48.3\"<\/span>,<span class=\"s2\">\"timestamp\"<\/span>:<span class=\"s2\">\"2016-12-26T11:05:07+0900\"<\/span><span class=\"o\">}<\/span>\r\n<span class=\"o\">{<\/span><span class=\"s2\">\"message\"<\/span>:<span class=\"s2\">\"time:22\/Dec\/2016:18:07:59 +0900<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">host:196.93.44.211<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">forwardedfor:196.93.44.211<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">req:GET \/category\/electronics?from=10 HTTP\/1.1<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">status:200<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">size:136<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">referer:\/category\/electronics<\/span><span class=\"se\">\\t<\/span><span class=\"s2\">ua:Mozilla\/5.0 (Windows NT 6.1; WOW64; rv:10.0.1) Gecko\/20100101 Firefox\/10.0.1\"<\/span>,<span class=\"s2\">\"timestamp\"<\/span>:<span class=\"s2\">\"2016-12-26T11:05:07+0900\"<\/span><span class=\"o\">}<\/span>\r\n<\/code><\/pre>\n<h1>Spark\u7684\u51c6\u5907<\/h1>\n<p>\u6211\u5728\u8fd9\u91cc\u4e5f\u5199\u8fc7\u4e86\uff0c\u6240\u4ee5\u7701\u7565\u3002<br \/>\n\u53ea\u662f\u4e0b\u8f7d\u548c\u89e3\u538b\u4e86spark-2.0.0.tgz\u3002<\/p>\n<h1>\u521b\u5efaScala\u73af\u5883\u3002<\/h1>\n<h2>\u5b89\u88c5Scala\u548cSBT\u3002<\/h2>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">sbt 0.13.12<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">scala 2.11.6-6<\/ul>\n<p>\u53ef\u4ee5\u901a\u8fc7\u4ee5\u4e0b\u94fe\u63a5\u4e0b\u8f7d `apt` \u5b89\u88c5\u6587\u4ef6\uff1ahttps:\/\/dl.bintray.com\/sbt\/debian<\/p>\n<pre class=\"post-pre\"><code><span class=\"nv\">$ <\/span>apt <span class=\"nb\">install <\/span><span class=\"nv\">scala<\/span><span class=\"o\">=<\/span>2.11.6-6 <span class=\"nv\">sbt<\/span><span class=\"o\">=<\/span>0.13.12\r\n<\/code><\/pre>\n<h2>\u521b\u5efa\u76ee\u5f55<\/h2>\n<pre class=\"post-pre\"><code><span class=\"nv\">$ <\/span><span class=\"nb\">mkdir<\/span> <span class=\"nt\">-p<\/span> sample\/src\/main\/scala\/\r\n<\/code><\/pre>\n<h2>\u521b\u5efa\u4e00\u4e2abuild.sbt\u6587\u4ef6<\/h2>\n<pre class=\"post-pre\"><code><span class=\"n\">$<\/span> <span class=\"n\">cat<\/span> <span class=\"n\">sample<\/span><span class=\"o\">\/<\/span><span class=\"nv\">build<\/span><span class=\"o\">.<\/span><span class=\"py\">sbt<\/span> \r\n<span class=\"k\">import<\/span> <span class=\"nn\">scala.util.Properties<\/span>\r\n\r\n<span class=\"n\">name<\/span> <span class=\"o\">:=<\/span> <span class=\"s\">\"Test\"<\/span>\r\n<span class=\"n\">version<\/span> <span class=\"o\">:=<\/span> <span class=\"s\">\"1.0\"<\/span>\r\n<span class=\"n\">scalaVersion<\/span> <span class=\"o\">:=<\/span> <span class=\"s\">\"2.11.6\"<\/span>\r\n\r\n<span class=\"n\">libraryDependencies<\/span> <span class=\"o\">+=<\/span> <span class=\"s\">\"org.apache.spark\"<\/span> <span class=\"o\">%<\/span> <span class=\"s\">\"spark-core_2.11\"<\/span> <span class=\"o\">%<\/span> <span class=\"s\">\"2.0.0\"<\/span>\r\n<span class=\"n\">libraryDependencies<\/span> <span class=\"o\">+=<\/span> <span class=\"s\">\"org.apache.spark\"<\/span> <span class=\"o\">%<\/span> <span class=\"s\">\"spark-streaming_2.11\"<\/span> <span class=\"o\">%<\/span> <span class=\"s\">\"2.0.0\"<\/span>\r\n<span class=\"n\">libraryDependencies<\/span> <span class=\"o\">+=<\/span> <span class=\"s\">\"org.apache.spark\"<\/span> <span class=\"o\">%<\/span> <span class=\"s\">\"spark-streaming-kafka-0-8_2.11\"<\/span> <span class=\"o\">%<\/span> <span class=\"s\">\"2.0.0\"<\/span>\r\n<span class=\"n\">libraryDependencies<\/span> <span class=\"o\">+=<\/span> <span class=\"s\">\"net.liftweb\"<\/span> <span class=\"o\">%<\/span> <span class=\"s\">\"lift-json_2.11\"<\/span> <span class=\"o\">%<\/span> <span class=\"s\">\"3.0.1\"<\/span>\r\n<span class=\"n\">libraryDependencies<\/span> <span class=\"o\">+=<\/span> <span class=\"s\">\"com.github.seratch\"<\/span> <span class=\"o\">%<\/span> <span class=\"s\">\"ltsv4s_2.11\"<\/span> <span class=\"o\">%<\/span> <span class=\"s\">\"1.0.+\"<\/span>\r\n\r\n<span class=\"n\">assemblyMergeStrategy<\/span> <span class=\"n\">in<\/span> <span class=\"n\">assembly<\/span> <span class=\"o\">:=<\/span> <span class=\"o\">{<\/span>\r\n  <span class=\"k\">case<\/span> <span class=\"nc\">PathList<\/span><span class=\"o\">(<\/span><span class=\"s\">\"javax\"<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"servlet\"<\/span><span class=\"o\">,<\/span> <span class=\"n\">xs<\/span> <span class=\"k\">@<\/span> <span class=\"k\">_<\/span><span class=\"o\">*)<\/span>         <span class=\"k\">=&gt;<\/span> <span class=\"nv\">MergeStrategy<\/span><span class=\"o\">.<\/span><span class=\"py\">first<\/span>\r\n  <span class=\"k\">case<\/span> <span class=\"nc\">PathList<\/span><span class=\"o\">(<\/span><span class=\"n\">ps<\/span> <span class=\"k\">@<\/span> <span class=\"k\">_<\/span><span class=\"o\">*)<\/span> <span class=\"k\">if<\/span> <span class=\"nv\">ps<\/span><span class=\"o\">.<\/span><span class=\"py\">last<\/span> <span class=\"n\">endsWith<\/span> <span class=\"s\">\".properties\"<\/span> <span class=\"k\">=&gt;<\/span> <span class=\"nv\">MergeStrategy<\/span><span class=\"o\">.<\/span><span class=\"py\">first<\/span>\r\n  <span class=\"k\">case<\/span> <span class=\"nc\">PathList<\/span><span class=\"o\">(<\/span><span class=\"n\">ps<\/span> <span class=\"k\">@<\/span> <span class=\"k\">_<\/span><span class=\"o\">*)<\/span> <span class=\"k\">if<\/span> <span class=\"nv\">ps<\/span><span class=\"o\">.<\/span><span class=\"py\">last<\/span> <span class=\"n\">endsWith<\/span> <span class=\"s\">\".xml\"<\/span> <span class=\"k\">=&gt;<\/span> <span class=\"nv\">MergeStrategy<\/span><span class=\"o\">.<\/span><span class=\"py\">first<\/span>\r\n  <span class=\"k\">case<\/span> <span class=\"nc\">PathList<\/span><span class=\"o\">(<\/span><span class=\"n\">ps<\/span> <span class=\"k\">@<\/span> <span class=\"k\">_<\/span><span class=\"o\">*)<\/span> <span class=\"k\">if<\/span> <span class=\"nv\">ps<\/span><span class=\"o\">.<\/span><span class=\"py\">last<\/span> <span class=\"n\">endsWith<\/span> <span class=\"s\">\".types\"<\/span> <span class=\"k\">=&gt;<\/span> <span class=\"nv\">MergeStrategy<\/span><span class=\"o\">.<\/span><span class=\"py\">first<\/span>\r\n  <span class=\"k\">case<\/span> <span class=\"nc\">PathList<\/span><span class=\"o\">(<\/span><span class=\"n\">ps<\/span> <span class=\"k\">@<\/span> <span class=\"k\">_<\/span><span class=\"o\">*)<\/span> <span class=\"k\">if<\/span> <span class=\"nv\">ps<\/span><span class=\"o\">.<\/span><span class=\"py\">last<\/span> <span class=\"n\">endsWith<\/span> <span class=\"s\">\".class\"<\/span> <span class=\"k\">=&gt;<\/span> <span class=\"nv\">MergeStrategy<\/span><span class=\"o\">.<\/span><span class=\"py\">first<\/span>\r\n  <span class=\"k\">case<\/span> <span class=\"s\">\"application.conf\"<\/span>                            <span class=\"k\">=&gt;<\/span> <span class=\"nv\">MergeStrategy<\/span><span class=\"o\">.<\/span><span class=\"py\">concat<\/span>\r\n  <span class=\"k\">case<\/span> <span class=\"s\">\"unwanted.txt\"<\/span>                                <span class=\"k\">=&gt;<\/span> <span class=\"nv\">MergeStrategy<\/span><span class=\"o\">.<\/span><span class=\"py\">discard<\/span>\r\n  <span class=\"k\">case<\/span> <span class=\"n\">x<\/span> <span class=\"k\">=&gt;<\/span>\r\n    <span class=\"k\">val<\/span> <span class=\"nv\">oldStrategy<\/span> <span class=\"k\">=<\/span> <span class=\"o\">(<\/span><span class=\"n\">assemblyMergeStrategy<\/span> <span class=\"n\">in<\/span> <span class=\"n\">assembly<\/span><span class=\"o\">).<\/span><span class=\"py\">value<\/span>\r\n    <span class=\"nf\">oldStrategy<\/span><span class=\"o\">(<\/span><span class=\"n\">x<\/span><span class=\"o\">)<\/span>\r\n<span class=\"o\">}<\/span>\r\n<\/code><\/pre>\n<p>\u5173\u4e8eassemblyMergeStrategy\uff0c\u5728\u6267\u884cassembly\u65f6\u9047\u5230\u4e86\u5404\u79cd\u9519\u8bef\uff0c\u6240\u4ee5\u6211\u590d\u5236\u9ecf\u8d34\u4e86\u5b83\u4eec\uff08\u4e0d\u592a\u660e\u767d\uff09\u3002<\/p>\n<h2>\u4e3a\u4e86\u4f7f\u7528assembly\u529f\u80fd\uff0c\u9700\u8981\u6dfb\u52a0\u63d2\u4ef6\u3002<\/h2>\n<pre class=\"post-pre\"><code><span class=\"n\">$<\/span> <span class=\"n\">mkdir<\/span> <span class=\"o\">-<\/span><span class=\"n\">p<\/span> <span class=\"n\">sample<\/span><span class=\"o\">\/<\/span><span class=\"n\">project<\/span><span class=\"o\">\/<\/span>\r\n<span class=\"n\">$<\/span> <span class=\"n\">cat<\/span> <span class=\"n\">sample<\/span><span class=\"o\">\/<\/span><span class=\"n\">project<\/span><span class=\"o\">\/<\/span><span class=\"nv\">plugins<\/span><span class=\"o\">.<\/span><span class=\"py\">sbt<\/span> \r\n<span class=\"n\">resolvers<\/span> <span class=\"o\">+=<\/span> <span class=\"nv\">Resolver<\/span><span class=\"o\">.<\/span><span class=\"py\">url<\/span><span class=\"o\">(<\/span><span class=\"s\">\"artifactory\"<\/span><span class=\"o\">,<\/span> <span class=\"nf\">url<\/span><span class=\"o\">(<\/span><span class=\"s\">\"http:\/\/scalasbt.artifactoryonline.com\/scalasbt\/sbt-plugin-releases\"<\/span><span class=\"o\">))(<\/span><span class=\"nv\">Resolver<\/span><span class=\"o\">.<\/span><span class=\"py\">ivyStylePatterns<\/span><span class=\"o\">)<\/span>\r\n  <span class=\"nf\">addSbtPlugin<\/span><span class=\"o\">(<\/span><span class=\"s\">\"com.eed3si9n\"<\/span> <span class=\"o\">%<\/span> <span class=\"s\">\"sbt-assembly\"<\/span> <span class=\"o\">%<\/span> <span class=\"s\">\"0.14.3\"<\/span><span class=\"o\">)<\/span>\r\n<\/code><\/pre>\n<h2>Kafka\u7684\u4ee3\u7801\u4ec5\u83b7\u53d6\u5e76\u6253\u5370\u3002<\/h2>\n<p>\u6309\u7167\u6837\u672c\u7684\u8981\u6c42<\/p>\n<pre class=\"post-pre\"><code><span class=\"k\">package<\/span> <span class=\"nn\">com.test.spark<\/span>\r\n\r\n<span class=\"k\">import<\/span> <span class=\"nn\">java.util.Date<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">java.util.Calendar<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">org.apache.kafka.clients.producer.<\/span><span class=\"o\">{<\/span><span class=\"nc\">KafkaProducer<\/span><span class=\"o\">,<\/span> <span class=\"nc\">ProducerConfig<\/span><span class=\"o\">,<\/span> <span class=\"nc\">ProducerRecord<\/span><span class=\"o\">}<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">org.apache.spark.SparkConf<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">org.apache.spark.streaming._<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">org.apache.spark.streaming.kafka._<\/span>\r\n\r\n<span class=\"k\">object<\/span> <span class=\"nc\">KafkaWorker<\/span> <span class=\"o\">{<\/span>\r\n  <span class=\"k\">def<\/span> <span class=\"nf\">main<\/span><span class=\"o\">(<\/span><span class=\"n\">args<\/span><span class=\"k\">:<\/span> <span class=\"kt\">Array<\/span><span class=\"o\">[<\/span><span class=\"kt\">String<\/span><span class=\"o\">])<\/span> <span class=\"o\">{<\/span>\r\n    <span class=\"c1\">\/\/ zkQuorum(127.0.0.1:2181), group(test), topics(nginx), numThreads(2), Sec<\/span>\r\n    <span class=\"nf\">if<\/span> <span class=\"o\">(<\/span><span class=\"nv\">args<\/span><span class=\"o\">.<\/span><span class=\"py\">length<\/span> <span class=\"o\">&lt;<\/span> <span class=\"mi\">5<\/span><span class=\"o\">)<\/span> <span class=\"o\">{<\/span>\r\n      <span class=\"nv\">System<\/span><span class=\"o\">.<\/span><span class=\"py\">exit<\/span><span class=\"o\">(<\/span><span class=\"mi\">1<\/span><span class=\"o\">)<\/span>\r\n    <span class=\"o\">}<\/span>\r\n    <span class=\"k\">val<\/span> <span class=\"nv\">Array<\/span><span class=\"o\">(<\/span><span class=\"n\">zkQuorum<\/span><span class=\"o\">,<\/span> <span class=\"n\">group<\/span><span class=\"o\">,<\/span> <span class=\"n\">topics<\/span><span class=\"o\">,<\/span> <span class=\"n\">numThreads<\/span><span class=\"o\">,<\/span> <span class=\"n\">sec<\/span><span class=\"o\">)<\/span> <span class=\"k\">=<\/span> <span class=\"n\">args<\/span>\r\n    <span class=\"k\">val<\/span> <span class=\"nv\">secSleep<\/span>    <span class=\"k\">=<\/span> <span class=\"nv\">sec<\/span><span class=\"o\">.<\/span><span class=\"py\">toInt<\/span>\r\n    <span class=\"k\">val<\/span> <span class=\"nv\">topicMap<\/span>    <span class=\"k\">=<\/span> <span class=\"nv\">topics<\/span><span class=\"o\">.<\/span><span class=\"py\">split<\/span><span class=\"o\">(<\/span><span class=\"s\">\",\"<\/span><span class=\"o\">).<\/span><span class=\"py\">map<\/span><span class=\"o\">((<\/span><span class=\"k\">_<\/span><span class=\"o\">,<\/span> <span class=\"nv\">numThreads<\/span><span class=\"o\">.<\/span><span class=\"py\">toInt<\/span><span class=\"o\">)).<\/span><span class=\"py\">toMap<\/span>\r\n    <span class=\"k\">val<\/span> <span class=\"nv\">sparkConf<\/span>   <span class=\"k\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">SparkConf<\/span><span class=\"o\">().<\/span><span class=\"py\">setAppName<\/span><span class=\"o\">(<\/span><span class=\"s\">\"KafkaWorker\"<\/span><span class=\"o\">)<\/span>\r\n    <span class=\"k\">val<\/span> <span class=\"nv\">ssc<\/span>         <span class=\"k\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">StreamingContext<\/span><span class=\"o\">(<\/span><span class=\"n\">sparkConf<\/span><span class=\"o\">,<\/span> <span class=\"nc\">Seconds<\/span><span class=\"o\">(<\/span><span class=\"n\">secSleep<\/span><span class=\"o\">))<\/span>\r\n    <span class=\"k\">val<\/span> <span class=\"nv\">kafkaStream<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">KafkaUtils<\/span><span class=\"o\">.<\/span><span class=\"py\">createStream<\/span><span class=\"o\">(<\/span><span class=\"n\">ssc<\/span><span class=\"o\">,<\/span> <span class=\"n\">zkQuorum<\/span><span class=\"o\">,<\/span> <span class=\"n\">group<\/span><span class=\"o\">,<\/span> <span class=\"n\">topicMap<\/span><span class=\"o\">).<\/span><span class=\"py\">map<\/span><span class=\"o\">(<\/span><span class=\"nv\">_<\/span><span class=\"o\">.<\/span><span class=\"py\">_2<\/span><span class=\"o\">)<\/span>\r\n    <span class=\"nv\">ssc<\/span><span class=\"o\">.<\/span><span class=\"py\">checkpoint<\/span><span class=\"o\">(<\/span><span class=\"s\">\"checkpoint\"<\/span><span class=\"o\">)<\/span>\r\n\r\n    <span class=\"nv\">kafkaStream<\/span><span class=\"o\">.<\/span><span class=\"py\">foreachRDD<\/span><span class=\"o\">{<\/span> <span class=\"n\">rdd<\/span> <span class=\"k\">=&gt;<\/span> \r\n      <span class=\"nf\">println<\/span><span class=\"o\">(<\/span><span class=\"s\">\"### Start %s ###\"<\/span><span class=\"o\">.<\/span><span class=\"py\">format<\/span><span class=\"o\">(<\/span><span class=\"nv\">Calendar<\/span><span class=\"o\">.<\/span><span class=\"py\">getInstance<\/span><span class=\"o\">.<\/span><span class=\"py\">getTime<\/span><span class=\"o\">.<\/span><span class=\"py\">toString<\/span><span class=\"o\">))<\/span>\r\n      <span class=\"nv\">rdd<\/span><span class=\"o\">.<\/span><span class=\"py\">foreach<\/span><span class=\"o\">(<\/span><span class=\"n\">print<\/span><span class=\"o\">)<\/span>\r\n      <span class=\"nf\">println<\/span><span class=\"o\">(<\/span><span class=\"s\">\"### END %s ###\\n\"<\/span><span class=\"o\">.<\/span><span class=\"py\">format<\/span><span class=\"o\">(<\/span><span class=\"nv\">Calendar<\/span><span class=\"o\">.<\/span><span class=\"py\">getInstance<\/span><span class=\"o\">.<\/span><span class=\"py\">getTime<\/span><span class=\"o\">.<\/span><span class=\"py\">toString<\/span><span class=\"o\">))<\/span>\r\n    <span class=\"o\">}<\/span>\r\n\r\n    <span class=\"nv\">ssc<\/span><span class=\"o\">.<\/span><span class=\"py\">start<\/span><span class=\"o\">()<\/span>\r\n    <span class=\"nv\">ssc<\/span><span class=\"o\">.<\/span><span class=\"py\">awaitTermination<\/span><span class=\"o\">()<\/span>\r\n  <span class=\"o\">}<\/span>\r\n<span class=\"o\">}<\/span>\r\n<\/code><\/pre>\n<h2>\u7f16\u8bd1<\/h2>\n<pre class=\"post-pre\"><code><span class=\"nv\">$ <\/span><span class=\"nb\">cd <\/span>sample\/\r\n<span class=\"nv\">$ <\/span>sbt assembly <span class=\"c\">#\u5927\u91cf\u306bwarn\u304c\u3067\u308b\u3051\u3069Merging\u306a\u306e\u3067\u6c17\u306b\u3057\u306a\u3044<\/span>\r\n<span class=\"nv\">$ <\/span>ll target\/scala-2.11\/Test-assembly-1.0.jar \r\n<span class=\"nt\">-rw-r--r--<\/span> 1 root root 112868840 Dec 26 11:24 target\/scala-2.11\/Test-assembly-1.0.jar\r\n<\/code><\/pre>\n<h2>\u7528Spark\u6267\u884cJar\u6587\u4ef6<\/h2>\n<p>\u5f15\u6570\u6309\u7167\u6e90\u4ee3\u7801\u7684\u8981\u6c42\u4ee5 zkQuorum(127.0.0.1:2181)\u3001group(test)\u3001topics(nginx)\u3001numThreads(2)\u3001Sec \u7684\u987a\u5e8f\u8fdb\u884c\u8bbe\u7f6e\u3002<\/p>\n<pre class=\"post-pre\"><code><span class=\"nv\">$ <\/span><span class=\"nb\">cd<\/span> \/opt\/spark\r\n<span class=\"nv\">$ <\/span>.\/bin\/spark-submit <span class=\"nt\">--class<\/span> com.test.spark.KafkaWorker  \/root\/sample\/target\/scala-2.11\/Testsembly-1.0.jar 127.0.0.1:2181 <span class=\"nb\">test <\/span>nginx 2 5\r\n<span class=\"c\">### Start Mon Dec 26 11:31:05 JST 2016 ###<\/span>\r\n<span class=\"c\">### END Mon Dec 26 11:31:05 JST 2016 ###<\/span>\r\n\r\n<span class=\"c\">### Start Mon Dec 26 11:31:10 JST 2016 ###<\/span>\r\n<span class=\"c\">### END Mon Dec 26 11:31:10 JST 2016 ###<\/span>\r\n<\/code><\/pre>\n<p>\u786e\u8ba4\u4e86\u5927\u6982\u6bcf5\u79d2\u6267\u884c\u4e00\u6b21\u7684\u5904\u7406\u3002<\/p>\n<h2>\u5c06\u6570\u636e\u8f93\u5165\u5e76\u89c2\u5bdf<\/h2>\n<pre class=\"post-pre\"><code><span class=\"nv\">$ <\/span><span class=\"nb\">head <\/span>sample.log| \/agent\/embedded\/bin\/fluent-cat <span class=\"nt\">--none<\/span> data.nginx\r\n<\/code><\/pre>\n<pre class=\"post-pre\"><code>### Start Mon Dec 26 11:32:20 JST 2016 ###\r\n{\"message\":\"time:22\/Dec\/2016:18:07:56 +0900\\thost:164.81.181.112\\tforwardedfor:164.81.181.112\\treq:GET \/category\/office HTTP\/1.1\\tstatus:200\\tsize:124\\treferer:\/item\/games\/3481\\tua:Mozilla\/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit\/534.46 (KHTML, like Gecko) Version\/5.1 Mobile\/9A405 Safari\/7534.48.3\",\"timestamp\":\"2016-12-26T11:32:16+0900\"}{\"message\":\"time:22\/Dec\/2016:18:07:59 +0900\\thost:196.93.44.211\\tforwardedfor:196.93.44.211\\treq:GET \/category\/electronics?from=10 HTTP\/1.1\\tstatus:200\\tsize:136\\treferer:\/category\/electronics\\tua:Mozilla\/5.0 (Windows NT 6.1; WOW64; rv:10.0.1) Gecko\/20100101 Firefox\/10.0.1\",\"timestamp\":\"2016-12-26T11:32:16+0900\"}{\"message\":\"time:22\/Dec\/2016:18:08:02 +0900\\thost:20.171.223.57\\tforwardedfor:20.171.223.57\\treq:GET \/category\/finance HTTP\/1.1\\tstatus:200\\tsize:78\\treferer:\/category\/office\\tua:Mozilla\/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident\/5.0)\",\"timestamp\":\"2016-12-26T11:32:16+0900\"}{\"message\":\"time:22\/Dec\/2016:18:08:06 +0900\\thost:212.159.169.49\\tforwardedfor:212.159.169.49\\treq:GET \/item\/computers\/2268 HTTP\/1.1\\tstatus:200\\tsize:139\\treferer:\/item\/networking\/248\\tua:Mozilla\/5.0 (Windows NT 6.1; WOW64) AppleWebKit\/535.11 (KHTML, like Gecko) Chrome\/17.0.963.56 Safari\/535.11\",\"timestamp\":\"2016-12-26T11:32:16+0900\"}{\"message\":\"time:22\/Dec\/2016:18:08:07 +0900\\thost:140.69.110.95\\tforwardedfor:140.69.110.95\\treq:GET \/category\/books HTTP\/1.1\\tstatus:200\\tsize:109\\treferer:-\\tua:Mozilla\/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko\/20100101 Firefox\/9.0.1\",\"timestamp\":\"2016-12-26T11:32:16+0900\"}{\"message\":\"time:22\/Dec\/2016:18:08:09 +0900\\thost:172.18.127.139\\tforwardedfor:172.18.127.139\\treq:GET \/category\/electronics HTTP\/1.1\\tstatus:200\\tsize:135\\treferer:-\\tua:Mozilla\/5.0 (Windows NT 6.0; rv:10.0.1) Gecko\/20100101 Firefox\/10.0.1\",\"timestamp\":\"2016-12-26T11:32:16+0900\"}{\"message\":\"time:22\/Dec\/2016:18:08:12 +0900\\thost:120.222.102.169\\tforwardedfor:120.222.102.169\\treq:POST \/search\/?c=Computers+Electronics HTTP\/1.1\\tstatus:200\\tsize:128\\treferer:-\\tua:Mozilla\/5.0 (Windows NT 6.1; WOW64) AppleWebKit\/535.11 (KHTML, like Gecko) Chrome\/17.0.963.56 Safari\/535.11\",\"timestamp\":\"2016-12-26T11:32:16+0900\"}{\"message\":\"time:22\/Dec\/2016:18:08:16 +0900\\thost:116.150.211.139\\tforwardedfor:116.150.211.139\\treq:GET \/category\/electronics HTTP\/1.1\\tstatus:200\\tsize:55\\treferer:-\\tua:Mozilla\/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident\/4.0; GTB7.2; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C)\",\"timestamp\":\"2016-12-26T11:32:16+0900\"}{\"message\":\"time:22\/Dec\/2016:18:08:16 +0900\\thost:192.42.23.199\\tforwardedfor:192.42.23.199\\treq:GET \/category\/networking HTTP\/1.1\\tstatus:200\\tsize:59\\treferer:-\\tua:Mozilla\/5.0 (Windows NT 6.1; WOW64) AppleWebKit\/535.7 (KHTML, like Gecko) Chrome\/16.0.912.77 Safari\/535.7\",\"timestamp\":\"2016-12-26T11:32:16+0900\"}{\"message\":\"time:22\/Dec\/2016:18:08:16 +0900\\thost:220.84.166.98\\tforwardedfor:220.84.166.98\\treq:GET \/category\/toys HTTP\/1.1\\tstatus:200\\tsize:124\\treferer:\/item\/office\/4833\\tua:Mozilla\/5.0 (compatible; Googlebot\/2.1; +http:\/\/www.google.com\/bot.html)\",\"timestamp\":\"2016-12-26T11:32:16+0900\"}\r\n### END Mon Dec 26 11:32:20 JST 2016 ###\r\n<\/code><\/pre>\n<p>\u6211\u6b63\u786e\u5730\u4eceKafka\u4e2d\u83b7\u53d6\u5e76\u6253\u5370\u4e86\u6570\u636e\u3002<\/p>\n<h1>\u6211\u8a66\u8457\u9032\u884c\u7d71\u8a08\u3002<\/h1>\n<p>\u5c1d\u8bd5\u5bf9\u8bbf\u95ee\u65e5\u5fd7\u7684\u8bf7\u6c42\u8def\u5f84\u7684\u7b2c\u4e00\u7ea7\u8fdb\u884c\u6bcf5\u79d2\u8fdb\u884c\u6c47\u603b\uff0c\u4f46\u8981\u6392\u9664\u56fe\u6807\u3001js\u548c\u9519\u8bef\u54cd\u5e94\u3002\u6240\u4ee5\u6761\u4ef6\u662f status == 200 \u4e14 size &gt; 100 \u8fdb\u884c\u7b5b\u9009\u3002<\/p>\n<h2>\u8bf7\u4f7f\u7528\u4e2d\u6587\u5c06\u4ee5\u4e0b\u5185\u5bb9\u8fdb\u884c\u91ca\u4e49\uff1a<\/h2>\n<pre class=\"post-pre\"><code><span class=\"k\">package<\/span> <span class=\"nn\">com.test.spark<\/span>\r\n\r\n<span class=\"k\">import<\/span> <span class=\"nn\">java.util.HashMap<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">java.util.Date<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">java.util.Calendar<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">org.apache.kafka.clients.producer.<\/span><span class=\"o\">{<\/span><span class=\"nc\">KafkaProducer<\/span><span class=\"o\">,<\/span> <span class=\"nc\">ProducerConfig<\/span><span class=\"o\">,<\/span> <span class=\"nc\">ProducerRecord<\/span><span class=\"o\">}<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">org.apache.spark.SparkConf<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">org.apache.spark.streaming._<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">org.apache.spark.streaming.kafka._<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">scala.util.parsing.json.JSON<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">net.liftweb._<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">net.liftweb.json._<\/span>\r\n<span class=\"k\">import<\/span> <span class=\"nn\">com.github.seratch.ltsv4s._<\/span>\r\n\r\n<span class=\"k\">object<\/span> <span class=\"nc\">KafkaWorker<\/span> <span class=\"o\">{<\/span>\r\n  <span class=\"k\">case<\/span> <span class=\"k\">class<\/span> <span class=\"nc\">FluentEvent<\/span><span class=\"o\">(<\/span>\r\n      <span class=\"n\">timestamp<\/span><span class=\"k\">:<\/span> <span class=\"kt\">String<\/span><span class=\"o\">,<\/span>\r\n      <span class=\"n\">message<\/span><span class=\"k\">:<\/span> <span class=\"kt\">String<\/span>\r\n    <span class=\"o\">)<\/span>\r\n\r\n  <span class=\"k\">def<\/span> <span class=\"nf\">main<\/span><span class=\"o\">(<\/span><span class=\"n\">args<\/span><span class=\"k\">:<\/span> <span class=\"kt\">Array<\/span><span class=\"o\">[<\/span><span class=\"kt\">String<\/span><span class=\"o\">])<\/span> <span class=\"o\">{<\/span>\r\n    <span class=\"c1\">\/\/ zkQuorum(127.0.0.1:2181), group(test), topics(imp), numThreads(2), Sec<\/span>\r\n    <span class=\"nf\">if<\/span> <span class=\"o\">(<\/span><span class=\"nv\">args<\/span><span class=\"o\">.<\/span><span class=\"py\">length<\/span> <span class=\"o\">&lt;<\/span> <span class=\"mi\">5<\/span><span class=\"o\">)<\/span> <span class=\"o\">{<\/span>\r\n      <span class=\"nv\">System<\/span><span class=\"o\">.<\/span><span class=\"py\">exit<\/span><span class=\"o\">(<\/span><span class=\"mi\">1<\/span><span class=\"o\">)<\/span>\r\n    <span class=\"o\">}<\/span>\r\n    <span class=\"k\">val<\/span> <span class=\"nv\">Array<\/span><span class=\"o\">(<\/span><span class=\"n\">zkQuorum<\/span><span class=\"o\">,<\/span> <span class=\"n\">group<\/span><span class=\"o\">,<\/span> <span class=\"n\">topics<\/span><span class=\"o\">,<\/span> <span class=\"n\">numThreads<\/span><span class=\"o\">,<\/span> <span class=\"n\">sec<\/span><span class=\"o\">)<\/span> <span class=\"k\">=<\/span> <span class=\"n\">args<\/span>\r\n    <span class=\"k\">val<\/span> <span class=\"nv\">secSleep<\/span>    <span class=\"k\">=<\/span> <span class=\"nv\">sec<\/span><span class=\"o\">.<\/span><span class=\"py\">toInt<\/span>\r\n    <span class=\"k\">val<\/span> <span class=\"nv\">topicMap<\/span>    <span class=\"k\">=<\/span> <span class=\"nv\">topics<\/span><span class=\"o\">.<\/span><span class=\"py\">split<\/span><span class=\"o\">(<\/span><span class=\"s\">\",\"<\/span><span class=\"o\">).<\/span><span class=\"py\">map<\/span><span class=\"o\">((<\/span><span class=\"k\">_<\/span><span class=\"o\">,<\/span> <span class=\"nv\">numThreads<\/span><span class=\"o\">.<\/span><span class=\"py\">toInt<\/span><span class=\"o\">)).<\/span><span class=\"py\">toMap<\/span>\r\n    <span class=\"k\">val<\/span> <span class=\"nv\">sparkConf<\/span>   <span class=\"k\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">SparkConf<\/span><span class=\"o\">().<\/span><span class=\"py\">setAppName<\/span><span class=\"o\">(<\/span><span class=\"s\">\"KafkaWorker\"<\/span><span class=\"o\">)<\/span>\r\n    <span class=\"k\">val<\/span> <span class=\"nv\">ssc<\/span>         <span class=\"k\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">StreamingContext<\/span><span class=\"o\">(<\/span><span class=\"n\">sparkConf<\/span><span class=\"o\">,<\/span> <span class=\"nc\">Seconds<\/span><span class=\"o\">(<\/span><span class=\"n\">secSleep<\/span><span class=\"o\">))<\/span>\r\n    <span class=\"k\">val<\/span> <span class=\"nv\">kafkaStream<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">KafkaUtils<\/span><span class=\"o\">.<\/span><span class=\"py\">createStream<\/span><span class=\"o\">(<\/span><span class=\"n\">ssc<\/span><span class=\"o\">,<\/span> <span class=\"n\">zkQuorum<\/span><span class=\"o\">,<\/span> <span class=\"n\">group<\/span><span class=\"o\">,<\/span> <span class=\"n\">topicMap<\/span><span class=\"o\">).<\/span><span class=\"py\">map<\/span><span class=\"o\">(<\/span><span class=\"nv\">_<\/span><span class=\"o\">.<\/span><span class=\"py\">_2<\/span><span class=\"o\">)<\/span>\r\n\r\n    <span class=\"nv\">ssc<\/span><span class=\"o\">.<\/span><span class=\"py\">checkpoint<\/span><span class=\"o\">(<\/span><span class=\"s\">\"checkpoint\"<\/span><span class=\"o\">)<\/span>\r\n\r\n    <span class=\"k\">val<\/span> <span class=\"nv\">nginxStream<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">kafkaStream<\/span><span class=\"o\">.<\/span><span class=\"py\">map<\/span><span class=\"o\">(<\/span><span class=\"nf\">convertFluentToMap<\/span><span class=\"o\">(<\/span><span class=\"k\">_<\/span><span class=\"o\">))<\/span>\r\n    <span class=\"c1\">\/\/ \u30b9\u30c6\u30fc\u30bf\u30b9\u30b3\u30fc\u30c9200\u4ee5\u4e0a\u3067respons\u304c100Byte\u4ee5\u4e0a\u306e\u30ea\u30af\u30a8\u30b9\u30c8\u3092\u30d1\u30b9(\u7b2c1\u968e\u5c64)\u306e\u307f\u62bd\u51fa<\/span>\r\n    <span class=\"k\">val<\/span> <span class=\"nv\">pathsStream<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">nginxStream<\/span><span class=\"o\">.<\/span><span class=\"py\">map<\/span><span class=\"o\">{<\/span><span class=\"n\">nginxRecord<\/span> <span class=\"k\">=&gt;<\/span>\r\n      <span class=\"nf\">if<\/span> <span class=\"o\">(<\/span><span class=\"nf\">nginxRecord<\/span><span class=\"o\">(<\/span><span class=\"s\">\"size\"<\/span><span class=\"o\">).<\/span><span class=\"py\">toInt<\/span> <span class=\"o\">&gt;=<\/span> <span class=\"mi\">100<\/span> <span class=\"o\">&amp;&amp;<\/span> <span class=\"nf\">nginxRecord<\/span><span class=\"o\">(<\/span><span class=\"s\">\"status\"<\/span><span class=\"o\">).<\/span><span class=\"py\">toInt<\/span> <span class=\"o\">==<\/span> <span class=\"mi\">200<\/span> <span class=\"o\">){<\/span>\r\n        <span class=\"nf\">reqToPath<\/span><span class=\"o\">(<\/span><span class=\"nf\">nginxRecord<\/span><span class=\"o\">(<\/span><span class=\"s\">\"req\"<\/span><span class=\"o\">)).<\/span><span class=\"py\">split<\/span><span class=\"o\">(<\/span><span class=\"s\">\"\/\"<\/span><span class=\"o\">)(<\/span><span class=\"mi\">1<\/span><span class=\"o\">)<\/span>\r\n      <span class=\"o\">}<\/span>\r\n    <span class=\"o\">}<\/span>\r\n    <span class=\"c1\">\/\/ path\u6bce\u306bcount\u3059\u308b<\/span>\r\n    <span class=\"k\">val<\/span> <span class=\"nv\">countPath<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">pathsStream<\/span><span class=\"o\">.<\/span><span class=\"py\">map<\/span><span class=\"o\">((<\/span><span class=\"k\">_<\/span><span class=\"o\">,<\/span> <span class=\"mi\">1<\/span><span class=\"o\">))<\/span>\r\n      <span class=\"o\">.<\/span><span class=\"py\">reduceByKeyAndWindow<\/span><span class=\"o\">(<\/span><span class=\"k\">_<\/span> <span class=\"o\">+<\/span> <span class=\"k\">_<\/span><span class=\"o\">,<\/span> <span class=\"nc\">Seconds<\/span><span class=\"o\">(<\/span><span class=\"n\">secSleep<\/span><span class=\"o\">))<\/span>\r\n      <span class=\"o\">.<\/span><span class=\"py\">map<\/span><span class=\"o\">{<\/span><span class=\"nf\">case<\/span> <span class=\"o\">(<\/span><span class=\"n\">path<\/span><span class=\"o\">,<\/span> <span class=\"n\">count<\/span><span class=\"o\">)<\/span> <span class=\"k\">=&gt;<\/span> <span class=\"o\">(<\/span><span class=\"n\">count<\/span><span class=\"o\">,<\/span> <span class=\"n\">path<\/span><span class=\"o\">)}<\/span>\r\n      <span class=\"o\">.<\/span><span class=\"py\">transform<\/span><span class=\"o\">(<\/span><span class=\"nv\">_<\/span><span class=\"o\">.<\/span><span class=\"py\">sortByKey<\/span><span class=\"o\">(<\/span><span class=\"kc\">false<\/span><span class=\"o\">))<\/span>\r\n\r\n    <span class=\"c1\">\/\/ OutPut<\/span>\r\n    <span class=\"nv\">countPath<\/span><span class=\"o\">.<\/span><span class=\"py\">foreachRDD<\/span><span class=\"o\">{<\/span> <span class=\"n\">rdd<\/span> <span class=\"k\">=&gt;<\/span> \r\n      <span class=\"nf\">println<\/span><span class=\"o\">(<\/span><span class=\"s\">\"### Start %s ###\"<\/span><span class=\"o\">.<\/span><span class=\"py\">format<\/span><span class=\"o\">(<\/span><span class=\"nv\">Calendar<\/span><span class=\"o\">.<\/span><span class=\"py\">getInstance<\/span><span class=\"o\">.<\/span><span class=\"py\">getTime<\/span><span class=\"o\">.<\/span><span class=\"py\">toString<\/span><span class=\"o\">))<\/span>\r\n      <span class=\"k\">val<\/span> <span class=\"nv\">path<\/span> <span class=\"k\">=<\/span> <span class=\"nv\">rdd<\/span><span class=\"o\">.<\/span><span class=\"py\">take<\/span><span class=\"o\">(<\/span><span class=\"mi\">10<\/span><span class=\"o\">)<\/span> \r\n      <span class=\"nv\">path<\/span><span class=\"o\">.<\/span><span class=\"py\">foreach<\/span><span class=\"o\">{<\/span><span class=\"nf\">case<\/span> <span class=\"o\">(<\/span><span class=\"n\">count<\/span><span class=\"o\">,<\/span> <span class=\"n\">tag<\/span><span class=\"o\">)<\/span> <span class=\"k\">=&gt;<\/span> \r\n        <span class=\"n\">tag<\/span> <span class=\"k\">match<\/span> <span class=\"o\">{<\/span>\r\n          <span class=\"k\">case<\/span> <span class=\"n\">tag<\/span><span class=\"k\">:<\/span> <span class=\"kt\">String<\/span> <span class=\"o\">=&gt;<\/span> <span class=\"nf\">println<\/span><span class=\"o\">(<\/span><span class=\"s\">\"%s count (%s)\"<\/span><span class=\"o\">.<\/span><span class=\"py\">format<\/span><span class=\"o\">(<\/span><span class=\"n\">count<\/span><span class=\"o\">,<\/span> <span class=\"n\">tag<\/span><span class=\"o\">))<\/span>\r\n          <span class=\"k\">case<\/span> <span class=\"k\">_<\/span> <span class=\"k\">=&gt;<\/span> <span class=\"nf\">println<\/span><span class=\"o\">(<\/span><span class=\"s\">\"%s count not match\"<\/span><span class=\"o\">.<\/span><span class=\"py\">format<\/span><span class=\"o\">(<\/span><span class=\"n\">count<\/span><span class=\"o\">))<\/span>\r\n        <span class=\"o\">}<\/span>\r\n      <span class=\"o\">}<\/span>\r\n      <span class=\"nf\">println<\/span><span class=\"o\">(<\/span><span class=\"s\">\"### END %s ###\\n\"<\/span><span class=\"o\">.<\/span><span class=\"py\">format<\/span><span class=\"o\">(<\/span><span class=\"nv\">Calendar<\/span><span class=\"o\">.<\/span><span class=\"py\">getInstance<\/span><span class=\"o\">.<\/span><span class=\"py\">getTime<\/span><span class=\"o\">.<\/span><span class=\"py\">toString<\/span><span class=\"o\">))<\/span>\r\n    <span class=\"o\">}<\/span>\r\n\r\n    <span class=\"nv\">ssc<\/span><span class=\"o\">.<\/span><span class=\"py\">start<\/span><span class=\"o\">()<\/span>\r\n    <span class=\"nv\">ssc<\/span><span class=\"o\">.<\/span><span class=\"py\">awaitTermination<\/span><span class=\"o\">()<\/span>\r\n  <span class=\"o\">}<\/span>\r\n  <span class=\"k\">def<\/span> <span class=\"nf\">parseNginxLtsv<\/span><span class=\"o\">(<\/span><span class=\"n\">record<\/span><span class=\"k\">:<\/span> <span class=\"kt\">String<\/span><span class=\"o\">)<\/span> <span class=\"k\">=<\/span> <span class=\"o\">{<\/span> <span class=\"nv\">LTSV<\/span><span class=\"o\">.<\/span><span class=\"py\">parseLine<\/span><span class=\"o\">(<\/span><span class=\"n\">record<\/span><span class=\"o\">)<\/span> <span class=\"o\">}<\/span>\r\n  <span class=\"k\">def<\/span> <span class=\"nf\">parseFluentJson<\/span><span class=\"o\">(<\/span><span class=\"n\">record<\/span><span class=\"k\">:<\/span> <span class=\"kt\">String<\/span><span class=\"o\">)<\/span> <span class=\"k\">=<\/span> <span class=\"o\">{<\/span>\r\n    <span class=\"k\">implicit<\/span> <span class=\"k\">val<\/span> <span class=\"nv\">formats<\/span> <span class=\"k\">=<\/span> <span class=\"nc\">DefaultFormats<\/span>\r\n    <span class=\"nf\">parse<\/span><span class=\"o\">(<\/span><span class=\"n\">record<\/span><span class=\"o\">).<\/span><span class=\"py\">extract<\/span><span class=\"o\">[<\/span><span class=\"kt\">FluentEvent<\/span><span class=\"o\">].<\/span><span class=\"py\">message<\/span>\r\n  <span class=\"o\">}<\/span>\r\n  <span class=\"k\">def<\/span> <span class=\"nf\">convertFluentToMap<\/span><span class=\"o\">(<\/span><span class=\"n\">record<\/span><span class=\"k\">:<\/span> <span class=\"kt\">String<\/span><span class=\"o\">)<\/span> <span class=\"k\">=<\/span> <span class=\"o\">{<\/span> <span class=\"nf\">parseNginxLtsv<\/span><span class=\"o\">(<\/span><span class=\"nf\">parseFluentJson<\/span><span class=\"o\">(<\/span><span class=\"n\">record<\/span><span class=\"o\">))<\/span> <span class=\"o\">}<\/span>\r\n  <span class=\"k\">def<\/span> <span class=\"nf\">reqToPath<\/span><span class=\"o\">(<\/span><span class=\"n\">record<\/span><span class=\"k\">:<\/span> <span class=\"kt\">String<\/span><span class=\"o\">)<\/span> <span class=\"k\">=<\/span> <span class=\"o\">{<\/span> <span class=\"nv\">record<\/span><span class=\"o\">.<\/span><span class=\"py\">split<\/span><span class=\"o\">(<\/span><span class=\"s\">\" \"<\/span><span class=\"o\">)(<\/span><span class=\"mi\">1<\/span><span class=\"o\">)<\/span> <span class=\"o\">}<\/span>\r\n<span class=\"o\">}<\/span>\r\n<\/code><\/pre>\n<h2>\u8bd5\u7740\u52a8\u4e00\u4e0b<\/h2>\n<pre class=\"post-pre\"><code><span class=\"nv\">$ <\/span>sbt assembly\r\n<span class=\"nv\">$ <\/span><span class=\"nb\">cd<\/span> \/opt\/spark\r\n<span class=\"nv\">$ <\/span>.\/bin\/spark-submit <span class=\"nt\">--class<\/span> com.test.spark.KafkaWorker  \/root\/sample\/target\/scala-2.11\/Test-assembly-1.0.jar 127.0.0.1:2181 <span class=\"nb\">test <\/span>nginx 2 5\r\n<span class=\"c\">### Start Mon Dec 26 12:32:25 JST 2016 ###                                      <\/span>\r\n879 count not match\r\n285 count <span class=\"o\">(<\/span>category<span class=\"o\">)<\/span>\r\n190 count <span class=\"o\">(<\/span>item<span class=\"o\">)<\/span>\r\n48 count <span class=\"o\">(<\/span>search<span class=\"o\">)<\/span>\r\n<span class=\"c\">### END Mon Dec 26 12:32:25 JST 2016 ###<\/span>\r\n\r\n<span class=\"c\">### Start Mon Dec 26 12:32:30 JST 2016 ###<\/span>\r\n802 count not match\r\n267 count <span class=\"o\">(<\/span>category<span class=\"o\">)<\/span>\r\n175 count <span class=\"o\">(<\/span>item<span class=\"o\">)<\/span>\r\n38 count <span class=\"o\">(<\/span>search<span class=\"o\">)<\/span>\r\n<span class=\"c\">### END Mon Dec 26 12:32:30 JST 2016 ###<\/span>\r\n\r\n<span class=\"c\">### Start Mon Dec 26 12:32:35 JST 2016 ###<\/span>\r\n895 count not match\r\n321 count <span class=\"o\">(<\/span>category<span class=\"o\">)<\/span>\r\n181 count <span class=\"o\">(<\/span>item<span class=\"o\">)<\/span>\r\n53 count <span class=\"o\">(<\/span>search<span class=\"o\">)<\/span>\r\n<span class=\"c\">### END Mon Dec 26 12:32:35 JST 2016 ###<\/span>\r\n<\/code><\/pre>\n<p>\u5982\u679c\u6709\u65f6\u95f4\u7684\u8bdd\uff0c\u6211\u60f3\u8981\u8bd5\u8bd5Fluent\u7684DataCounter\u3001Norikura\u7b49\u6027\u80fd\u6d4b\u8bd5\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u30a4\u30f3\u30d5\u30e9\u30a8\u30f3\u30b8\u30cb\u30a2\u3063\u307d\u304fNginx\u306e\u30a2\u30af\u30bb\u30b9\u30ed\u30b0\u3092\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u96c6\u8a08\u3057\u3066\u307f\u308b &nbsp; Sample\u304cSca [&hellip;]<\/p>\n","protected":false},"author":3,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-46906","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"yoast_head":"<!-- This site is optimized with the Yoast SEO Premium plugin v21.5 (Yoast SEO v21.5) - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>\u4f5c\u4e3a\u4e00\u540d\u57fa\u7840\u8bbe\u65bd\u5de5\u7a0b\u5e08\uff0c\u6211\u5c1d\u8bd5\u4f7f\u7528Spark Streaming\u548cKafka\u6765\u805a\u5408\u8bbf\u95ee\u65e5\u5fd7 - Blog - Silicon Cloud<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.silicloud.com\/zh\/blog\/\u4f5c\u4e3a\u4e00\u540d\u57fa\u7840\u8bbe\u65bd\u5de5\u7a0b\u5e08\uff0c\u6211\u5c1d\u8bd5\u4f7f\u7528spark-streaming\u548ckafka\u6765\u805a\/\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u4f5c\u4e3a\u4e00\u540d\u57fa\u7840\u8bbe\u65bd\u5de5\u7a0b\u5e08\uff0c\u6211\u5c1d\u8bd5\u4f7f\u7528Spark Streaming\u548cKafka\u6765\u805a\u5408\u8bbf\u95ee\u65e5\u5fd7\" \/>\n<meta property=\"og:description\" content=\"\u30a4\u30f3\u30d5\u30e9\u30a8\u30f3\u30b8\u30cb\u30a2\u3063\u307d\u304fNginx\u306e\u30a2\u30af\u30bb\u30b9\u30ed\u30b0\u3092\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u96c6\u8a08\u3057\u3066\u307f\u308b &nbsp; Sample\u304cSca [&hellip;]\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.silicloud.com\/zh\/blog\/\u4f5c\u4e3a\u4e00\u540d\u57fa\u7840\u8bbe\u65bd\u5de5\u7a0b\u5e08\uff0c\u6211\u5c1d\u8bd5\u4f7f\u7528spark-streaming\u548ckafka\u6765\u805a\/\" \/>\n<meta property=\"og:site_name\" content=\"Blog - Silicon Cloud\" \/>\n<meta property=\"article:published_time\" content=\"2023-04-22T15:47:38+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2024-05-03T16:50:20+00:00\" \/>\n<meta name=\"author\" content=\"\u97f5, \u79d1\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"\u97f5, \u79d1\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"7 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%9c%e4%b8%ba%e4%b8%80%e5%90%8d%e5%9f%ba%e7%a1%80%e8%ae%be%e6%96%bd%e5%b7%a5%e7%a8%8b%e5%b8%88%ef%bc%8c%e6%88%91%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8spark-streaming%e5%92%8ckafka%e6%9d%a5%e8%81%9a\/\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%9c%e4%b8%ba%e4%b8%80%e5%90%8d%e5%9f%ba%e7%a1%80%e8%ae%be%e6%96%bd%e5%b7%a5%e7%a8%8b%e5%b8%88%ef%bc%8c%e6%88%91%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8spark-streaming%e5%92%8ckafka%e6%9d%a5%e8%81%9a\/\",\"name\":\"\u4f5c\u4e3a\u4e00\u540d\u57fa\u7840\u8bbe\u65bd\u5de5\u7a0b\u5e08\uff0c\u6211\u5c1d\u8bd5\u4f7f\u7528Spark Streaming\u548cKafka\u6765\u805a\u5408\u8bbf\u95ee\u65e5\u5fd7 - Blog - Silicon Cloud\",\"isPartOf\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\"},\"datePublished\":\"2023-04-22T15:47:38+00:00\",\"dateModified\":\"2024-05-03T16:50:20+00:00\",\"author\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/6530331a63adef3b3443a1fab53a0e6e\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%9c%e4%b8%ba%e4%b8%80%e5%90%8d%e5%9f%ba%e7%a1%80%e8%ae%be%e6%96%bd%e5%b7%a5%e7%a8%8b%e5%b8%88%ef%bc%8c%e6%88%91%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8spark-streaming%e5%92%8ckafka%e6%9d%a5%e8%81%9a\/#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%9c%e4%b8%ba%e4%b8%80%e5%90%8d%e5%9f%ba%e7%a1%80%e8%ae%be%e6%96%bd%e5%b7%a5%e7%a8%8b%e5%b8%88%ef%bc%8c%e6%88%91%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8spark-streaming%e5%92%8ckafka%e6%9d%a5%e8%81%9a\/\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%9c%e4%b8%ba%e4%b8%80%e5%90%8d%e5%9f%ba%e7%a1%80%e8%ae%be%e6%96%bd%e5%b7%a5%e7%a8%8b%e5%b8%88%ef%bc%8c%e6%88%91%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8spark-streaming%e5%92%8ckafka%e6%9d%a5%e8%81%9a\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.silicloud.com\/zh\/blog\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u4f5c\u4e3a\u4e00\u540d\u57fa\u7840\u8bbe\u65bd\u5de5\u7a0b\u5e08\uff0c\u6211\u5c1d\u8bd5\u4f7f\u7528Spark Streaming\u548cKafka\u6765\u805a\u5408\u8bbf\u95ee\u65e5\u5fd7\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/\",\"name\":\"Blog - Silicon Cloud\",\"description\":\"\",\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/6530331a63adef3b3443a1fab53a0e6e\",\"name\":\"\u97f5, \u79d1\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/secure.gravatar.com\/avatar\/429ccb39b3fff5188bc17986222cfb0936cbadb8cc933cff04ab5ca01bd30a08?s=96&d=mm&r=g\",\"contentUrl\":\"https:\/\/secure.gravatar.com\/avatar\/429ccb39b3fff5188bc17986222cfb0936cbadb8cc933cff04ab5ca01bd30a08?s=96&d=mm&r=g\",\"caption\":\"\u97f5, \u79d1\"},\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/author\/yunke\/\"},{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%9c%e4%b8%ba%e4%b8%80%e5%90%8d%e5%9f%ba%e7%a1%80%e8%ae%be%e6%96%bd%e5%b7%a5%e7%a8%8b%e5%b8%88%ef%bc%8c%e6%88%91%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8spark-streaming%e5%92%8ckafka%e6%9d%a5%e8%81%9a\/#local-main-organization-logo\",\"url\":\"\",\"contentUrl\":\"\",\"caption\":\"Blog - Silicon Cloud\"}]}<\/script>\n<!-- \/ Yoast SEO Premium plugin. -->","yoast_head_json":{"title":"\u4f5c\u4e3a\u4e00\u540d\u57fa\u7840\u8bbe\u65bd\u5de5\u7a0b\u5e08\uff0c\u6211\u5c1d\u8bd5\u4f7f\u7528Spark Streaming\u548cKafka\u6765\u805a\u5408\u8bbf\u95ee\u65e5\u5fd7 - Blog - Silicon Cloud","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.silicloud.com\/zh\/blog\/\u4f5c\u4e3a\u4e00\u540d\u57fa\u7840\u8bbe\u65bd\u5de5\u7a0b\u5e08\uff0c\u6211\u5c1d\u8bd5\u4f7f\u7528spark-streaming\u548ckafka\u6765\u805a\/","og_locale":"zh_CN","og_type":"article","og_title":"\u4f5c\u4e3a\u4e00\u540d\u57fa\u7840\u8bbe\u65bd\u5de5\u7a0b\u5e08\uff0c\u6211\u5c1d\u8bd5\u4f7f\u7528Spark Streaming\u548cKafka\u6765\u805a\u5408\u8bbf\u95ee\u65e5\u5fd7","og_description":"\u30a4\u30f3\u30d5\u30e9\u30a8\u30f3\u30b8\u30cb\u30a2\u3063\u307d\u304fNginx\u306e\u30a2\u30af\u30bb\u30b9\u30ed\u30b0\u3092\u30ea\u30a2\u30eb\u30bf\u30a4\u30e0\u96c6\u8a08\u3057\u3066\u307f\u308b &nbsp; Sample\u304cSca [&hellip;]","og_url":"https:\/\/www.silicloud.com\/zh\/blog\/\u4f5c\u4e3a\u4e00\u540d\u57fa\u7840\u8bbe\u65bd\u5de5\u7a0b\u5e08\uff0c\u6211\u5c1d\u8bd5\u4f7f\u7528spark-streaming\u548ckafka\u6765\u805a\/","og_site_name":"Blog - Silicon Cloud","article_published_time":"2023-04-22T15:47:38+00:00","article_modified_time":"2024-05-03T16:50:20+00:00","author":"\u97f5, \u79d1","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"\u97f5, \u79d1","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"7 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%9c%e4%b8%ba%e4%b8%80%e5%90%8d%e5%9f%ba%e7%a1%80%e8%ae%be%e6%96%bd%e5%b7%a5%e7%a8%8b%e5%b8%88%ef%bc%8c%e6%88%91%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8spark-streaming%e5%92%8ckafka%e6%9d%a5%e8%81%9a\/","url":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%9c%e4%b8%ba%e4%b8%80%e5%90%8d%e5%9f%ba%e7%a1%80%e8%ae%be%e6%96%bd%e5%b7%a5%e7%a8%8b%e5%b8%88%ef%bc%8c%e6%88%91%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8spark-streaming%e5%92%8ckafka%e6%9d%a5%e8%81%9a\/","name":"\u4f5c\u4e3a\u4e00\u540d\u57fa\u7840\u8bbe\u65bd\u5de5\u7a0b\u5e08\uff0c\u6211\u5c1d\u8bd5\u4f7f\u7528Spark Streaming\u548cKafka\u6765\u805a\u5408\u8bbf\u95ee\u65e5\u5fd7 - Blog - Silicon Cloud","isPartOf":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website"},"datePublished":"2023-04-22T15:47:38+00:00","dateModified":"2024-05-03T16:50:20+00:00","author":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/6530331a63adef3b3443a1fab53a0e6e"},"breadcrumb":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%9c%e4%b8%ba%e4%b8%80%e5%90%8d%e5%9f%ba%e7%a1%80%e8%ae%be%e6%96%bd%e5%b7%a5%e7%a8%8b%e5%b8%88%ef%bc%8c%e6%88%91%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8spark-streaming%e5%92%8ckafka%e6%9d%a5%e8%81%9a\/#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%9c%e4%b8%ba%e4%b8%80%e5%90%8d%e5%9f%ba%e7%a1%80%e8%ae%be%e6%96%bd%e5%b7%a5%e7%a8%8b%e5%b8%88%ef%bc%8c%e6%88%91%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8spark-streaming%e5%92%8ckafka%e6%9d%a5%e8%81%9a\/"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%9c%e4%b8%ba%e4%b8%80%e5%90%8d%e5%9f%ba%e7%a1%80%e8%ae%be%e6%96%bd%e5%b7%a5%e7%a8%8b%e5%b8%88%ef%bc%8c%e6%88%91%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8spark-streaming%e5%92%8ckafka%e6%9d%a5%e8%81%9a\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.silicloud.com\/zh\/blog\/"},{"@type":"ListItem","position":2,"name":"\u4f5c\u4e3a\u4e00\u540d\u57fa\u7840\u8bbe\u65bd\u5de5\u7a0b\u5e08\uff0c\u6211\u5c1d\u8bd5\u4f7f\u7528Spark Streaming\u548cKafka\u6765\u805a\u5408\u8bbf\u95ee\u65e5\u5fd7"}]},{"@type":"WebSite","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website","url":"https:\/\/www.silicloud.com\/zh\/blog\/","name":"Blog - Silicon Cloud","description":"","inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/6530331a63adef3b3443a1fab53a0e6e","name":"\u97f5, \u79d1","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/","url":"https:\/\/secure.gravatar.com\/avatar\/429ccb39b3fff5188bc17986222cfb0936cbadb8cc933cff04ab5ca01bd30a08?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/429ccb39b3fff5188bc17986222cfb0936cbadb8cc933cff04ab5ca01bd30a08?s=96&d=mm&r=g","caption":"\u97f5, \u79d1"},"url":"https:\/\/www.silicloud.com\/zh\/blog\/author\/yunke\/"},{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e4%bd%9c%e4%b8%ba%e4%b8%80%e5%90%8d%e5%9f%ba%e7%a1%80%e8%ae%be%e6%96%bd%e5%b7%a5%e7%a8%8b%e5%b8%88%ef%bc%8c%e6%88%91%e5%b0%9d%e8%af%95%e4%bd%bf%e7%94%a8spark-streaming%e5%92%8ckafka%e6%9d%a5%e8%81%9a\/#local-main-organization-logo","url":"","contentUrl":"","caption":"Blog - Silicon Cloud"}]}},"_links":{"self":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/46906","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/users\/3"}],"replies":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/comments?post=46906"}],"version-history":[{"count":2,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/46906\/revisions"}],"predecessor-version":[{"id":95117,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/46906\/revisions\/95117"}],"wp:attachment":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/media?parent=46906"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/categories?post=46906"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/tags?post=46906"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}