{"id":46898,"date":"2023-08-12T01:26:54","date_gmt":"2023-11-02T12:52:16","guid":{"rendered":"https:\/\/www.silicloud.com\/zh\/blog\/%e6%9c%89%e5%85%b3kafka%e5%92%8cspark-streaming%e7%9a%84%e9%9b%86%e6%88%90%e3%80%82\/"},"modified":"2024-04-30T18:10:09","modified_gmt":"2024-04-30T10:10:09","slug":"%e6%9c%89%e5%85%b3kafka%e5%92%8cspark-streaming%e7%9a%84%e9%9b%86%e6%88%90%e3%80%82","status":"publish","type":"post","link":"https:\/\/www.silicloud.com\/zh\/blog\/%e6%9c%89%e5%85%b3kafka%e5%92%8cspark-streaming%e7%9a%84%e9%9b%86%e6%88%90%e3%80%82\/","title":{"rendered":"\u6709\u5173Kafka\u548cSpark Streaming\u7684\u96c6\u6210"},"content":{"rendered":"<p>\u5728\u8fdb\u884cWeb\u8bbf\u95ee\u5206\u6790\u3001\u5b9e\u65f6\u65e5\u5fd7\u76d1\u63a7\u548c\u5f02\u5e38\u68c0\u6d4b\u3001\u793e\u4ea4\u5a92\u4f53\u5206\u6790\u7b49\u5de5\u4f5c\u65f6\uff0c\u4eba\u4eec\u5e38\u5e38\u4f1a\u4f7f\u7528\u7ec4\u5408\u4e86\u57fa\u4e8e\u5f00\u6e90\u7684\u5206\u5e03\u5f0f\u6d41\u5904\u7406\u5e73\u53f0Apache Kafka\u548cSpark\u5904\u7406\u6d41\u6570\u636e\u7684Spark Streaming\u3002\u672c\u6587\u5c06\u4ee5Twitter\u6d88\u606f\u5206\u6790\u4e3a\u4f8b\uff0c\u4ecb\u7ecd\u5728\u963f\u91cc\u5df4\u5df4\u4e91\u7684E-MapReduce\u4e0a\uff0c\u5982\u4f55\u96c6\u6210Kafka\u548cSpark Streaming\u7684\u65b9\u6cd5\uff0c\u5e0c\u671b\u80fd\u591f\u5411\u5927\u5bb6\u4ecb\u7ecd\u4e00\u4e0b\u3002<\/p>\n<h4>\u5173\u4e8e\u9a8c\u8bc1\u73af\u5883<\/h4>\n<h5>\u5b9e\u65f6\u6d41\u5904\u7406<\/h5>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">EMR-3.20.0<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u30af\u30e9\u30b9\u30bf\u30fc\u30bf\u30a4\u30d7\u306f Hadoop<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u30cf\u30fc\u30c9\u30a6\u30a7\u30a2\u69cb\u6210(Header)\u306fecs.sn2.large\u30921\u53f0<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\u30cf\u30fc\u30c9\u30a6\u30a7\u30a2\u69cb\u6210(Worker)\u306fecs.sn2.large\u30922\u53f0<\/ul>\n<pre class=\"post-pre\"><code><span class=\"c1\"># cat \/etc\/redhat-release<\/span>\r\n<span class=\"no\">CentOS<\/span> <span class=\"no\">Linux<\/span> <span class=\"n\">release<\/span> <span class=\"mf\">7.4<\/span><span class=\"o\">.<\/span><span class=\"mi\">1708<\/span> <span class=\"p\">(<\/span><span class=\"no\">Core<\/span><span class=\"p\">)<\/span> \r\n<span class=\"c1\"># uname -r<\/span>\r\n<span class=\"mf\">3.10<\/span><span class=\"o\">.<\/span><span class=\"mi\">0<\/span><span class=\"o\">-<\/span><span class=\"mf\">693.2<\/span><span class=\"o\">.<\/span><span class=\"mi\">2<\/span><span class=\"p\">.<\/span><span class=\"nf\">el7<\/span><span class=\"p\">.<\/span><span class=\"nf\">x86_64<\/span>\r\n<span class=\"c1\"># hadoop version<\/span>\r\n<span class=\"no\">Hadoop<\/span> <span class=\"mf\">2.8<\/span><span class=\"o\">.<\/span><span class=\"mi\">5<\/span>\r\n<span class=\"no\">Subversion<\/span> <span class=\"no\">Unknown<\/span> <span class=\"o\">-<\/span><span class=\"n\">r<\/span> <span class=\"n\">f6ae72b25f6dbb9925f0850ac865274d31a28e30<\/span>\r\n<span class=\"no\">Compiled<\/span> <span class=\"n\">by<\/span> <span class=\"n\">root<\/span> <span class=\"n\">on<\/span> <span class=\"mi\">2019<\/span><span class=\"o\">-<\/span><span class=\"mo\">04<\/span><span class=\"o\">-<\/span><span class=\"mi\">19<\/span><span class=\"no\">T06<\/span><span class=\"p\">:<\/span><span class=\"mi\">38<\/span><span class=\"no\">Z<\/span>\r\n<span class=\"no\">Compiled<\/span> <span class=\"n\">with<\/span> <span class=\"n\">protoc<\/span> <span class=\"mf\">2.5<\/span><span class=\"o\">.<\/span><span class=\"mi\">0<\/span>\r\n<span class=\"no\">From<\/span> <span class=\"n\">source<\/span> <span class=\"n\">with<\/span> <span class=\"n\">checksum<\/span> <span class=\"mi\">9624<\/span><span class=\"n\">fc19bc23f1bbeacb1ae4bee88e7<\/span>\r\n<span class=\"no\">This<\/span> <span class=\"n\">command<\/span> <span class=\"n\">was<\/span> <span class=\"n\">run<\/span> <span class=\"n\">using<\/span> <span class=\"sr\">\/opt\/<\/span><span class=\"n\">apps<\/span><span class=\"o\">\/<\/span><span class=\"n\">ecm<\/span><span class=\"o\">\/<\/span><span class=\"n\">service<\/span><span class=\"o\">\/<\/span><span class=\"n\">hadoop<\/span><span class=\"o\">\/<\/span><span class=\"mf\">2.8<\/span><span class=\"o\">.<\/span><span class=\"mi\">5<\/span><span class=\"o\">-<\/span><span class=\"mf\">1.3<\/span><span class=\"o\">.<\/span><span class=\"mi\">0<\/span><span class=\"o\">\/<\/span><span class=\"n\">package<\/span><span class=\"o\">\/<\/span><span class=\"n\">hadoop<\/span><span class=\"o\">-<\/span><span class=\"mf\">2.8<\/span><span class=\"o\">.<\/span><span class=\"mi\">5<\/span><span class=\"o\">-<\/span><span class=\"mf\">1.3<\/span><span class=\"o\">.<\/span><span class=\"mi\">0<\/span><span class=\"o\">\/<\/span><span class=\"n\">share<\/span><span class=\"o\">\/<\/span><span class=\"n\">hadoop<\/span><span class=\"o\">\/<\/span><span class=\"n\">common<\/span><span class=\"o\">\/<\/span><span class=\"n\">hadoop<\/span><span class=\"o\">-<\/span><span class=\"n\">common<\/span><span class=\"o\">-<\/span><span class=\"mf\">2.8<\/span><span class=\"o\">.<\/span><span class=\"mi\">5<\/span><span class=\"p\">.<\/span><span class=\"nf\">jar<\/span>\r\n<span class=\"c1\"># spark-submit --version<\/span>\r\n<span class=\"no\">Welcome<\/span> <span class=\"n\">to<\/span>\r\n      <span class=\"n\">____<\/span>              <span class=\"n\">__<\/span>\r\n     <span class=\"sr\">\/ __\/<\/span><span class=\"n\">__<\/span>  <span class=\"n\">___<\/span> <span class=\"n\">_____<\/span><span class=\"o\">\/<\/span> <span class=\"sr\">\/__\r\n    _\\ \\\/ _ \\\/ _ `\/<\/span> <span class=\"n\">__<\/span><span class=\"o\">\/<\/span>  <span class=\"err\">'<\/span><span class=\"n\">_<\/span><span class=\"o\">\/<\/span>\r\n   <span class=\"sr\">\/___\/<\/span> <span class=\"p\">.<\/span><span class=\"nf\">__<\/span><span class=\"o\">\/<\/span><span class=\"p\">\\<\/span><span class=\"n\">_<\/span><span class=\"p\">,<\/span><span class=\"n\">_<\/span><span class=\"o\">\/<\/span><span class=\"n\">_<\/span><span class=\"o\">\/<\/span> <span class=\"sr\">\/_\/<\/span><span class=\"p\">\\<\/span><span class=\"n\">_<\/span><span class=\"p\">\\<\/span>   <span class=\"n\">version<\/span> <span class=\"mf\">2.4<\/span><span class=\"o\">.<\/span><span class=\"mi\">2<\/span>\r\n      <span class=\"sr\">\/_\/<\/span>\r\n\r\n<span class=\"no\">Using<\/span> <span class=\"no\">Scala<\/span> <span class=\"n\">version<\/span> <span class=\"mf\">2.11<\/span><span class=\"o\">.<\/span><span class=\"mi\">12<\/span><span class=\"p\">,<\/span> <span class=\"no\">OpenJDK<\/span> <span class=\"mi\">64<\/span><span class=\"o\">-<\/span><span class=\"no\">Bit<\/span> <span class=\"no\">Server<\/span> <span class=\"no\">VM<\/span><span class=\"p\">,<\/span> <span class=\"mf\">1.8<\/span><span class=\"o\">.<\/span><span class=\"mo\">0_151<\/span>\r\n<span class=\"no\">Branch<\/span> <span class=\"n\">branch<\/span><span class=\"o\">-<\/span><span class=\"mf\">2.4<\/span><span class=\"o\">.<\/span><span class=\"mi\">2<\/span>\r\n<span class=\"no\">Compiled<\/span> <span class=\"n\">by<\/span> <span class=\"n\">user<\/span> <span class=\"n\">root<\/span> <span class=\"n\">on<\/span> <span class=\"mi\">2019<\/span><span class=\"o\">-<\/span><span class=\"mo\">04<\/span><span class=\"o\">-<\/span><span class=\"mi\">26<\/span><span class=\"no\">T06<\/span><span class=\"p\">:<\/span><span class=\"mi\">32<\/span><span class=\"p\">:<\/span><span class=\"mi\">50<\/span><span class=\"no\">Z<\/span>\r\n<span class=\"no\">Revision<\/span> <span class=\"n\">ef3e6ff4b20dc86ac81291664bb0ed3cf2641fea<\/span>\r\n<span class=\"no\">Url<\/span> <span class=\"sr\">\/root\/<\/span><span class=\"n\">git<\/span><span class=\"o\">-<\/span><span class=\"n\">repos<\/span><span class=\"o\">\/<\/span><span class=\"n\">emr<\/span><span class=\"o\">-<\/span><span class=\"n\">spark<\/span><span class=\"p\">.<\/span><span class=\"nf\">git<\/span><span class=\"o\">\/<\/span>\r\n<span class=\"no\">Type<\/span> <span class=\"o\">--<\/span><span class=\"n\">help<\/span> <span class=\"k\">for<\/span> <span class=\"n\">more<\/span> <span class=\"n\">information<\/span><span class=\"o\">.<\/span>\r\n\r\n<\/code><\/pre>\n<h5>\u5361\u592b\u5361<\/h5>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">EMR-3.20.0<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">Zookeeper 3.4.13<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">Kakfa 1.1.1<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u30af\u30e9\u30b9\u30bf\u30fc\u30bf\u30a4\u30d7\u306f Kafka<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\n<li style=\"list-style-type: none;\">\n<ul class=\"post-ul\">\u30cf\u30fc\u30c9\u30a6\u30a7\u30a2\u69cb\u6210(Header)\u306fecs.sn2.large\u30921\u53f0<\/ul>\n<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<ul class=\"post-ul\">\u30cf\u30fc\u30c9\u30a6\u30a7\u30a2\u69cb\u6210(Worker)\u306fecs.sn2.large\u30922\u53f0<\/ul>\n<pre class=\"post-pre\"><code><span class=\"p\">[<\/span><span class=\"n\">root<\/span><span class=\"vi\">@emr<\/span><span class=\"o\">-<\/span><span class=\"n\">header<\/span><span class=\"o\">-<\/span><span class=\"mi\">1<\/span> <span class=\"n\">bin<\/span><span class=\"p\">]<\/span><span class=\"c1\"># echo envi | nc localhost 2181<\/span>\r\n<span class=\"no\">Environment<\/span><span class=\"p\">:<\/span>\r\n<span class=\"n\">zookeeper<\/span><span class=\"p\">.<\/span><span class=\"nf\">version<\/span><span class=\"o\">=<\/span><span class=\"mf\">3.4<\/span><span class=\"o\">.<\/span><span class=\"mi\">13<\/span><span class=\"o\">-<\/span><span class=\"mi\">2<\/span><span class=\"n\">d71af4dbe22557fda74f9a9b4309b15a7487f03<\/span><span class=\"p\">,<\/span> <span class=\"n\">built<\/span> <span class=\"n\">on<\/span> <span class=\"mo\">06<\/span><span class=\"o\">\/<\/span><span class=\"mi\">29<\/span><span class=\"o\">\/<\/span><span class=\"mi\">2018<\/span> <span class=\"mo\">04<\/span><span class=\"p\">:<\/span><span class=\"mo\">05<\/span> <span class=\"no\">GMT<\/span>\r\n<span class=\"n\">host<\/span><span class=\"p\">.<\/span><span class=\"nf\">name<\/span><span class=\"o\">=<\/span><span class=\"n\">emr<\/span><span class=\"o\">-<\/span><span class=\"n\">header<\/span><span class=\"o\">-<\/span><span class=\"mi\">1<\/span><span class=\"p\">.<\/span><span class=\"nf\">cluster<\/span><span class=\"o\">-<\/span><span class=\"mi\">43709<\/span>\r\n<span class=\"n\">java<\/span><span class=\"p\">.<\/span><span class=\"nf\">version<\/span><span class=\"o\">=<\/span><span class=\"mf\">1.8<\/span><span class=\"o\">.<\/span><span class=\"mo\">0_151<\/span>\r\n<span class=\"n\">java<\/span><span class=\"p\">.<\/span><span class=\"nf\">vendor<\/span><span class=\"o\">=<\/span><span class=\"no\">Oracle<\/span> <span class=\"no\">Corporation<\/span>\r\n<span class=\"n\">java<\/span><span class=\"p\">.<\/span><span class=\"nf\">home<\/span><span class=\"o\">=<\/span><span class=\"sr\">\/usr\/<\/span><span class=\"n\">lib<\/span><span class=\"o\">\/<\/span><span class=\"n\">jvm<\/span><span class=\"o\">\/<\/span><span class=\"n\">java<\/span><span class=\"o\">-<\/span><span class=\"mf\">1.8<\/span><span class=\"o\">.<\/span><span class=\"mi\">0<\/span><span class=\"o\">-<\/span><span class=\"n\">openjdk<\/span><span class=\"o\">-<\/span><span class=\"mf\">1.8<\/span><span class=\"o\">.<\/span><span class=\"mf\">0.151<\/span><span class=\"o\">-<\/span><span class=\"mi\">1<\/span><span class=\"p\">.<\/span><span class=\"nf\">b12<\/span><span class=\"p\">.<\/span><span class=\"nf\">el7_4<\/span><span class=\"p\">.<\/span><span class=\"nf\">x86_64<\/span><span class=\"o\">\/<\/span><span class=\"n\">jre<\/span>\r\n<span class=\"n\">java<\/span><span class=\"p\">.<\/span><span class=\"nf\">class<\/span><span class=\"p\">.<\/span><span class=\"nf\">path<\/span><span class=\"o\">=<\/span><span class=\"sr\">\/usr\/<\/span><span class=\"n\">lib<\/span><span class=\"o\">\/<\/span><span class=\"n\">zookeeper<\/span><span class=\"o\">-<\/span><span class=\"n\">current<\/span><span class=\"o\">\/<\/span><span class=\"n\">bin<\/span><span class=\"o\">\/..<\/span><span class=\"sr\">\/build\/<\/span><span class=\"n\">classes<\/span><span class=\"ss\">:\/<\/span><span class=\"n\">usr<\/span><span class=\"o\">\/<\/span><span class=\"n\">lib<\/span><span class=\"o\">\/<\/span><span class=\"n\">zookeeper<\/span><span class=\"o\">-<\/span><span class=\"n\">current<\/span><span class=\"o\">\/<\/span><span class=\"n\">bin<\/span><span class=\"o\">\/..<\/span><span class=\"sr\">\/build\/<\/span><span class=\"n\">lib<\/span><span class=\"o\">\/*<\/span><span class=\"p\">.<\/span><span class=\"nf\">jar<\/span><span class=\"ss\">:\/<\/span><span class=\"n\">usr<\/span><span class=\"o\">\/<\/span><span class=\"n\">lib<\/span><span class=\"o\">\/<\/span><span class=\"n\">zookeeper<\/span><span class=\"o\">-<\/span><span class=\"n\">current<\/span><span class=\"o\">\/<\/span><span class=\"n\">bin<\/span><span class=\"o\">\/..<\/span><span class=\"sr\">\/lib\/s<\/span><span class=\"n\">lf4j<\/span><span class=\"o\">-<\/span><span class=\"n\">log4j12<\/span><span class=\"o\">-<\/span><span class=\"mf\">1.7<\/span><span class=\"o\">.<\/span><span class=\"mi\">25<\/span><span class=\"p\">.<\/span><span class=\"nf\">jar<\/span><span class=\"ss\">:\/<\/span><span class=\"n\">usr<\/span><span class=\"o\">\/<\/span><span class=\"n\">lib<\/span><span class=\"o\">\/<\/span><span class=\"n\">zookeeper<\/span><span class=\"o\">-<\/span><span class=\"n\">current<\/span><span class=\"o\">\/<\/span><span class=\"n\">bin<\/span><span class=\"o\">\/..<\/span><span class=\"sr\">\/lib\/s<\/span><span class=\"n\">lf4j<\/span><span class=\"o\">-<\/span><span class=\"n\">api<\/span><span class=\"o\">-<\/span><span class=\"mf\">1.7<\/span><span class=\"o\">.<\/span><span class=\"mi\">25<\/span><span class=\"p\">.<\/span><span class=\"nf\">jar<\/span><span class=\"ss\">:\/<\/span><span class=\"n\">usr<\/span><span class=\"o\">\/<\/span><span class=\"n\">lib<\/span><span class=\"o\">\/<\/span><span class=\"n\">zookeeper<\/span><span class=\"o\">-<\/span><span class=\"n\">current<\/span><span class=\"o\">\/<\/span><span class=\"n\">bin<\/span><span class=\"o\">\/..<\/span><span class=\"sr\">\/lib\/ne<\/span><span class=\"n\">tty<\/span><span class=\"o\">-<\/span><span class=\"mf\">3.10<\/span><span class=\"o\">.<\/span><span class=\"mi\">6<\/span><span class=\"o\">.<\/span><span class=\"no\">Final<\/span><span class=\"p\">.<\/span><span class=\"nf\">jar<\/span><span class=\"ss\">:\/<\/span><span class=\"n\">usr<\/span><span class=\"o\">\/<\/span><span class=\"n\">lib<\/span><span class=\"o\">\/<\/span><span class=\"n\">zookeeper<\/span><span class=\"o\">-<\/span><span class=\"n\">current<\/span><span class=\"o\">\/<\/span><span class=\"n\">bin<\/span><span class=\"o\">\/..<\/span><span class=\"sr\">\/lib\/<\/span><span class=\"n\">log4j<\/span><span class=\"o\">-<\/span><span class=\"mf\">1.2<\/span><span class=\"o\">.<\/span><span class=\"mi\">17<\/span><span class=\"p\">.<\/span><span class=\"nf\">jar<\/span><span class=\"ss\">:\/<\/span><span class=\"n\">usr<\/span><span class=\"o\">\/<\/span><span class=\"n\">lib<\/span><span class=\"o\">\/<\/span><span class=\"n\">zookeeper<\/span><span class=\"o\">-<\/span><span class=\"n\">current<\/span><span class=\"o\">\/<\/span><span class=\"n\">bin<\/span><span class=\"o\">\/..<\/span><span class=\"sr\">\/lib\/<\/span><span class=\"n\">jline<\/span><span class=\"o\">-<\/span><span class=\"mf\">0.9<\/span><span class=\"o\">.<\/span><span class=\"mi\">94<\/span><span class=\"p\">.<\/span><span class=\"nf\">jar<\/span><span class=\"ss\">:\/<\/span><span class=\"n\">usr<\/span><span class=\"o\">\/<\/span><span class=\"n\">lib<\/span><span class=\"o\">\/<\/span><span class=\"n\">zookeeper<\/span><span class=\"o\">-<\/span><span class=\"n\">current<\/span><span class=\"o\">\/<\/span><span class=\"n\">bin<\/span><span class=\"o\">\/..<\/span><span class=\"sr\">\/lib\/<\/span><span class=\"n\">audience<\/span><span class=\"o\">-<\/span><span class=\"n\">annotations<\/span><span class=\"o\">-<\/span><span class=\"mf\">0.5<\/span><span class=\"o\">.<\/span><span class=\"mi\">0<\/span><span class=\"p\">.<\/span><span class=\"nf\">jar<\/span><span class=\"ss\">:\/<\/span><span class=\"n\">usr<\/span><span class=\"o\">\/<\/span><span class=\"n\">lib<\/span><span class=\"o\">\/<\/span><span class=\"n\">zookeeper<\/span><span class=\"o\">-<\/span><span class=\"n\">current<\/span><span class=\"o\">\/<\/span><span class=\"n\">bin<\/span><span class=\"o\">\/..<\/span><span class=\"sr\">\/zookeeper-3.4.13.jar:\/us<\/span><span class=\"n\">r<\/span><span class=\"o\">\/<\/span><span class=\"n\">lib<\/span><span class=\"o\">\/<\/span><span class=\"n\">zookeeper<\/span><span class=\"o\">-<\/span><span class=\"n\">current<\/span><span class=\"o\">\/<\/span><span class=\"n\">bin<\/span><span class=\"o\">\/..<\/span><span class=\"sr\">\/src\/<\/span><span class=\"n\">java<\/span><span class=\"o\">\/<\/span><span class=\"n\">lib<\/span><span class=\"o\">\/*<\/span><span class=\"p\">.<\/span><span class=\"nf\">jar<\/span><span class=\"ss\">:\/<\/span><span class=\"n\">etc<\/span><span class=\"o\">\/<\/span><span class=\"n\">ecm<\/span><span class=\"o\">\/<\/span><span class=\"n\">zookeeper<\/span><span class=\"o\">-<\/span><span class=\"n\">conf<\/span><span class=\"o\">::\/<\/span><span class=\"n\">var<\/span><span class=\"o\">\/<\/span><span class=\"n\">lib<\/span><span class=\"o\">\/<\/span><span class=\"n\">ecm<\/span><span class=\"o\">-<\/span><span class=\"n\">agent<\/span><span class=\"o\">\/<\/span><span class=\"n\">data<\/span><span class=\"o\">\/<\/span><span class=\"n\">jmxetric<\/span><span class=\"o\">-<\/span><span class=\"mf\">1.0<\/span><span class=\"o\">.<\/span><span class=\"mi\">8<\/span><span class=\"p\">.<\/span><span class=\"nf\">jar<\/span>\r\n<span class=\"n\">java<\/span><span class=\"p\">.<\/span><span class=\"nf\">library<\/span><span class=\"p\">.<\/span><span class=\"nf\">path<\/span><span class=\"o\">=<\/span><span class=\"sr\">\/usr\/<\/span><span class=\"n\">java<\/span><span class=\"o\">\/<\/span><span class=\"n\">packages<\/span><span class=\"o\">\/<\/span><span class=\"n\">lib<\/span><span class=\"o\">\/<\/span><span class=\"n\">amd64<\/span><span class=\"ss\">:\/<\/span><span class=\"n\">usr<\/span><span class=\"o\">\/<\/span><span class=\"n\">lib64<\/span><span class=\"ss\">:\/<\/span><span class=\"n\">lib64<\/span><span class=\"ss\">:\/<\/span><span class=\"n\">lib<\/span><span class=\"ss\">:\/<\/span><span class=\"n\">usr<\/span><span class=\"o\">\/<\/span><span class=\"n\">lib<\/span>\r\n<span class=\"n\">java<\/span><span class=\"p\">.<\/span><span class=\"nf\">io<\/span><span class=\"p\">.<\/span><span class=\"nf\">tmpdir<\/span><span class=\"o\">=<\/span><span class=\"sr\">\/tmp\r\njava.compiler=&lt;NA&gt;\r\nos.name=Linux\r\nos.arch=amd64\r\nos.version=3.10.0-693.2.2.el7.x86_64\r\nuser.name=hadoop\r\nuser.home=\/<\/span><span class=\"n\">home<\/span><span class=\"o\">\/<\/span><span class=\"n\">hadoop<\/span>\r\n<span class=\"n\">user<\/span><span class=\"p\">.<\/span><span class=\"nf\">dir<\/span><span class=\"o\">=<\/span><span class=\"sr\">\/home\/<\/span><span class=\"n\">hadoop<\/span>\r\n<\/code><\/pre>\n<h5>\u84dd\u9e1f\u5ba2\u6237\u7aef<\/h5>\n<p>Hosebird Client\u662f\u4e00\u4e2aJava Http\u5ba2\u6237\u7aef\uff0c\u7528\u4e8e\u4e0eKafka Producer\u914d\u5408\u4f7f\u7528\uff0c\u8c03\u7528Twitter\u7684Streaming API\u3002\u5982\u679c\u60a8\u60f3\u4e86\u89e3\u66f4\u8be6\u7ec6\u7684\u4fe1\u606f\uff0c\u8bf7\u53c2\u8003\u4e0b\u9762\u7684GitHub\u94fe\u63a5\u3002[https:\/\/github.com\/twitter\/hbc:embed:cite]<\/p>\n<h4>\u6574\u4f53\u7ed3\u6784\u56fe<\/h4>\n<p>\u57fa\u4e8e\u5206\u6790\u7684\u76ee\u7684\uff0c\u6211\u4eec\u5e0c\u671b\u901a\u8fc7\u5b9e\u65f6\u63a8\u6587\u6570\u6765\u8868\u793a\u5728Twitter\u4e0a\u6307\u5b9a\u5173\u952e\u8bcd\u7684\u70ed\u5ea6\u3002\u9996\u5148\uff0c\u6784\u5efa\u7684\u963f\u91cc\u5df4\u5df4\u4e91\u7ed3\u6784\u56fe\u5982\u4e0b\u6240\u793a\u3002<\/p>\n<div><img decoding=\"async\" class=\"post-images\" title=\"f:id:sbc_kou:20190716161135p:plain\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7b8f913a08637a699a80\/12-0.png\" alt=\"f:id:sbc_kou:20190716161135p:plain\" \/><\/div>\n<h4>\u5361\u592b\u5361\u751f\u4ea7\u8005<\/h4>\n<p>\u73b0\u5728\u51c6\u5907\u5de5\u4f5c\u5df2\u5168\u90e8\u5c31\u7eea\uff0c\u6211\u4eec\u7acb\u523b\u5f00\u59cb\u5728\u672c\u5730\u7684Java\u5f00\u53d1\u73af\u5883\u4e2d\u7f16\u5199\u4ee3\u7801\u5427\uff01\u9996\u5148\uff0c\u6839\u636e\u4ee5\u4e0b\u793a\u4f8b\u4ee3\u7801\u521b\u5efaKafka\u751f\u4ea7\u8005\uff0c\u5e76\u751f\u6210jar\u6587\u4ef6\u3002<\/p>\n<h5>\u63a8\u6587\u5173\u952e\u8bcd<\/h5>\n<p>\u6211\u6307\u5b9a\u4e86\u56db\u4e2a\u5173\u952e\u8bcd\uff1a\u201c\u6bd4\u7279\u5e01\u201d\u3001\u201c\u533a\u5757\u94fe\u201d\u3001\u201c\u7269\u8054\u7f51\u201d\u3001\u201c5G\u201d\u3002<\/p>\n<h5>Kafka\u5f15\u5bfc\u670d\u52a1\u5668<\/h5>\n<p>\u53ef\u4ee5\u4f7f\u7528 Kafka \u96c6\u7fa4\u4e2d\u7684\u4efb\u4f55\u4e00\u53f0 IP \u5730\u5740\u3002<\/p>\n<h5>\u63a8\u7279\u4e32\u6d41API\u7684\u8ba4\u8bc1\u4fe1\u606f<\/h5>\n<p>\u4e3a\u4e86\u4f7f\u7528Twitter Streaming API\uff0c\u9700\u8981\u4e8b\u5148\u83b7\u53d6\u5e76\u8f93\u5165consumerKey\u3001consumerSecret\u3001token\u3001secret\u3002<\/p>\n<pre class=\"post-pre\"><code>\r\n<span class=\"kd\">public<\/span> <span class=\"kd\">class<\/span> <span class=\"nc\">ProducerTest<\/span> <span class=\"o\">{<\/span>\r\n\r\n    <span class=\"nc\">Logger<\/span> <span class=\"n\">logger<\/span> <span class=\"o\">=<\/span> <span class=\"nc\">LoggerFactory<\/span><span class=\"o\">.<\/span><span class=\"na\">getLogger<\/span><span class=\"o\">(<\/span><span class=\"nc\">ProducerTest<\/span><span class=\"o\">.<\/span><span class=\"na\">class<\/span><span class=\"o\">.<\/span><span class=\"na\">getName<\/span><span class=\"o\">());<\/span>\r\n\r\n<span class=\"err\">\u3000<\/span><span class=\"cm\">\/** ---------------------- Twitter Streaming API\u60c5\u5831 ---------------------- *\/<\/span>\r\n    <span class=\"nc\">String<\/span> <span class=\"n\">consumerKey<\/span> <span class=\"o\">=<\/span> <span class=\"s\">\"xxxxxxxxxxxxxxxx\"<\/span><span class=\"o\">;<\/span>\r\n    <span class=\"nc\">String<\/span> <span class=\"n\">consumerSecret<\/span> <span class=\"o\">=<\/span> <span class=\"s\">\"xxxxxxxxxxxxxxxx\"<\/span><span class=\"o\">;<\/span>\r\n    <span class=\"nc\">String<\/span> <span class=\"n\">token<\/span> <span class=\"o\">=<\/span> <span class=\"s\">\"xxxxxxxxxxxxxxxx\"<\/span><span class=\"o\">;<\/span>\r\n    <span class=\"nc\">String<\/span> <span class=\"n\">secret<\/span> <span class=\"o\">=<\/span> <span class=\"s\">\"xxxxxxxxxxxxxxxx\"<\/span><span class=\"o\">;<\/span>\r\n    <span class=\"nc\">String<\/span> <span class=\"n\">mytopic<\/span> <span class=\"o\">=<\/span> <span class=\"s\">\"tweets_poc\"<\/span><span class=\"o\">;<\/span>\r\n\r\n<span class=\"err\">\u3000<\/span><span class=\"cm\">\/** ---------------------- Tweets\u30ad\u30fc\u30ef\u30fc\u30c9\u3092\u6307\u5b9a ---------------------- *\/<\/span>\r\n    <span class=\"nc\">List<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">&gt;<\/span> <span class=\"n\">terms<\/span> <span class=\"o\">=<\/span> <span class=\"nc\">Lists<\/span><span class=\"o\">.<\/span><span class=\"na\">newArrayList<\/span><span class=\"o\">(<\/span><span class=\"s\">\"bitcoin\"<\/span><span class=\"o\">,<\/span><span class=\"s\">\"Blockchain\"<\/span><span class=\"o\">,<\/span><span class=\"s\">\"IoT\"<\/span><span class=\"o\">,<\/span><span class=\"s\">\"5G\"<\/span><span class=\"o\">);<\/span>\r\n\r\n\r\n    <span class=\"kd\">public<\/span> <span class=\"nf\">ProducerTest<\/span><span class=\"o\">(){}<\/span>\r\n\r\n    <span class=\"kd\">public<\/span> <span class=\"kd\">static<\/span> <span class=\"kt\">void<\/span> <span class=\"nf\">main<\/span><span class=\"o\">(<\/span><span class=\"nc\">String<\/span><span class=\"o\">[]<\/span> <span class=\"n\">args<\/span><span class=\"o\">)<\/span> <span class=\"o\">{<\/span>\r\n        <span class=\"k\">new<\/span> <span class=\"nf\">ProducerTest<\/span><span class=\"o\">().<\/span><span class=\"na\">run<\/span><span class=\"o\">();<\/span>\r\n    <span class=\"o\">}<\/span>\r\n\r\n    <span class=\"kd\">public<\/span> <span class=\"kt\">void<\/span> <span class=\"nf\">run<\/span><span class=\"o\">(){<\/span>\r\n\r\n        <span class=\"n\">logger<\/span><span class=\"o\">.<\/span><span class=\"na\">info<\/span><span class=\"o\">(<\/span><span class=\"s\">\"Setup\"<\/span><span class=\"o\">);<\/span>\r\n\r\n        <span class=\"nc\">BlockingQueue<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">&gt;<\/span> <span class=\"n\">msgQueue<\/span> <span class=\"o\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">LinkedBlockingQueue<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">&gt;(<\/span><span class=\"mi\">1000<\/span><span class=\"o\">);<\/span>\r\n\r\n        <span class=\"nc\">Client<\/span> <span class=\"n\">client<\/span> <span class=\"o\">=<\/span> <span class=\"n\">createTwitterClient<\/span><span class=\"o\">(<\/span><span class=\"n\">msgQueue<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"n\">client<\/span><span class=\"o\">.<\/span><span class=\"na\">connect<\/span><span class=\"o\">();<\/span>\r\n\r\n        <span class=\"nc\">KafkaProducer<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">,<\/span> <span class=\"nc\">String<\/span><span class=\"o\">&gt;<\/span> <span class=\"n\">producer<\/span> <span class=\"o\">=<\/span> <span class=\"n\">createKafkaProducer<\/span><span class=\"o\">();<\/span>\r\n\r\n        <span class=\"nc\">Runtime<\/span><span class=\"o\">.<\/span><span class=\"na\">getRuntime<\/span><span class=\"o\">().<\/span><span class=\"na\">addShutdownHook<\/span><span class=\"o\">(<\/span><span class=\"k\">new<\/span> <span class=\"nc\">Thread<\/span><span class=\"o\">(()<\/span> <span class=\"o\">-&gt;<\/span> <span class=\"o\">{<\/span>\r\n            <span class=\"n\">logger<\/span><span class=\"o\">.<\/span><span class=\"na\">info<\/span><span class=\"o\">(<\/span><span class=\"s\">\"stopping application...\"<\/span><span class=\"o\">);<\/span>\r\n            <span class=\"n\">logger<\/span><span class=\"o\">.<\/span><span class=\"na\">info<\/span><span class=\"o\">(<\/span><span class=\"s\">\"shutting down client from twitter...\"<\/span><span class=\"o\">);<\/span>\r\n            <span class=\"n\">client<\/span><span class=\"o\">.<\/span><span class=\"na\">stop<\/span><span class=\"o\">();<\/span>\r\n            <span class=\"n\">logger<\/span><span class=\"o\">.<\/span><span class=\"na\">info<\/span><span class=\"o\">(<\/span><span class=\"s\">\"closing producer...\"<\/span><span class=\"o\">);<\/span>\r\n            <span class=\"n\">producer<\/span><span class=\"o\">.<\/span><span class=\"na\">close<\/span><span class=\"o\">();<\/span>\r\n            <span class=\"n\">logger<\/span><span class=\"o\">.<\/span><span class=\"na\">info<\/span><span class=\"o\">(<\/span><span class=\"s\">\"done!\"<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"o\">}));<\/span>\r\n\r\n        <span class=\"k\">while<\/span> <span class=\"o\">(!<\/span><span class=\"n\">client<\/span><span class=\"o\">.<\/span><span class=\"na\">isDone<\/span><span class=\"o\">())<\/span> <span class=\"o\">{<\/span>\r\n            <span class=\"nc\">String<\/span> <span class=\"n\">msg<\/span> <span class=\"o\">=<\/span> <span class=\"kc\">null<\/span><span class=\"o\">;<\/span>\r\n            <span class=\"k\">try<\/span> <span class=\"o\">{<\/span>\r\n                <span class=\"n\">msg<\/span> <span class=\"o\">=<\/span> <span class=\"n\">msgQueue<\/span><span class=\"o\">.<\/span><span class=\"na\">poll<\/span><span class=\"o\">(<\/span><span class=\"mi\">5<\/span><span class=\"o\">,<\/span> <span class=\"nc\">TimeUnit<\/span><span class=\"o\">.<\/span><span class=\"na\">SECONDS<\/span><span class=\"o\">);<\/span>\r\n            <span class=\"o\">}<\/span> <span class=\"k\">catch<\/span> <span class=\"o\">(<\/span><span class=\"nc\">InterruptedException<\/span> <span class=\"n\">e<\/span><span class=\"o\">)<\/span> <span class=\"o\">{<\/span>\r\n                <span class=\"n\">e<\/span><span class=\"o\">.<\/span><span class=\"na\">printStackTrace<\/span><span class=\"o\">();<\/span>\r\n                <span class=\"n\">client<\/span><span class=\"o\">.<\/span><span class=\"na\">stop<\/span><span class=\"o\">();<\/span>\r\n            <span class=\"o\">}<\/span>\r\n\r\n            <span class=\"k\">if<\/span> <span class=\"o\">(<\/span><span class=\"n\">msg<\/span> <span class=\"o\">!=<\/span> <span class=\"kc\">null<\/span><span class=\"o\">){<\/span>\r\n                <span class=\"n\">logger<\/span><span class=\"o\">.<\/span><span class=\"na\">info<\/span><span class=\"o\">(<\/span><span class=\"n\">msg<\/span><span class=\"o\">);<\/span>\r\n                <span class=\"k\">if<\/span><span class=\"o\">(<\/span><span class=\"nc\">StringUtils<\/span><span class=\"o\">.<\/span><span class=\"na\">containsIgnoreCase<\/span><span class=\"o\">(<\/span><span class=\"n\">msg<\/span><span class=\"o\">,<\/span><span class=\"s\">\"Bitcoin\"<\/span><span class=\"o\">)){<\/span>\r\n                    <span class=\"n\">producer<\/span><span class=\"o\">.<\/span><span class=\"na\">send<\/span><span class=\"o\">(<\/span><span class=\"k\">new<\/span> <span class=\"nc\">ProducerRecord<\/span><span class=\"o\">&lt;&gt;(<\/span><span class=\"n\">mytopic<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"Bitcoin\"<\/span><span class=\"o\">,<\/span> <span class=\"n\">msg<\/span><span class=\"o\">),<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">Callback<\/span><span class=\"o\">()<\/span> <span class=\"o\">{<\/span>\r\n                        <span class=\"nd\">@Override<\/span>\r\n                        <span class=\"kd\">public<\/span> <span class=\"kt\">void<\/span> <span class=\"nf\">onCompletion<\/span><span class=\"o\">(<\/span><span class=\"nc\">RecordMetadata<\/span> <span class=\"n\">recordMetadata<\/span><span class=\"o\">,<\/span> <span class=\"nc\">Exception<\/span> <span class=\"n\">e<\/span><span class=\"o\">)<\/span> <span class=\"o\">{<\/span>\r\n                            <span class=\"k\">if<\/span> <span class=\"o\">(<\/span><span class=\"n\">e<\/span> <span class=\"o\">!=<\/span> <span class=\"kc\">null<\/span><span class=\"o\">)<\/span> <span class=\"o\">{<\/span>\r\n                                <span class=\"n\">logger<\/span><span class=\"o\">.<\/span><span class=\"na\">error<\/span><span class=\"o\">(<\/span><span class=\"s\">\"Something bad happened\"<\/span><span class=\"o\">,<\/span> <span class=\"n\">e<\/span><span class=\"o\">);<\/span>\r\n                            <span class=\"o\">}<\/span>\r\n                        <span class=\"o\">}<\/span>\r\n                    <span class=\"o\">});<\/span>\r\n                <span class=\"o\">}<\/span>\r\n                <span class=\"k\">else<\/span> <span class=\"nf\">if<\/span> <span class=\"o\">(<\/span><span class=\"nc\">StringUtils<\/span><span class=\"o\">.<\/span><span class=\"na\">containsIgnoreCase<\/span><span class=\"o\">(<\/span><span class=\"n\">msg<\/span><span class=\"o\">,<\/span><span class=\"s\">\"Blockchain\"<\/span><span class=\"o\">))<\/span> <span class=\"o\">{<\/span>\r\n                    <span class=\"n\">producer<\/span><span class=\"o\">.<\/span><span class=\"na\">send<\/span><span class=\"o\">(<\/span><span class=\"k\">new<\/span> <span class=\"nc\">ProducerRecord<\/span><span class=\"o\">&lt;&gt;(<\/span><span class=\"n\">mytopic<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"Blockchain\"<\/span><span class=\"o\">,<\/span> <span class=\"n\">msg<\/span><span class=\"o\">),<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">Callback<\/span><span class=\"o\">()<\/span> <span class=\"o\">{<\/span>\r\n                        <span class=\"nd\">@Override<\/span>\r\n                        <span class=\"kd\">public<\/span> <span class=\"kt\">void<\/span> <span class=\"nf\">onCompletion<\/span><span class=\"o\">(<\/span><span class=\"nc\">RecordMetadata<\/span> <span class=\"n\">recordMetadata<\/span><span class=\"o\">,<\/span> <span class=\"nc\">Exception<\/span> <span class=\"n\">e<\/span><span class=\"o\">)<\/span> <span class=\"o\">{<\/span>\r\n                            <span class=\"k\">if<\/span> <span class=\"o\">(<\/span><span class=\"n\">e<\/span> <span class=\"o\">!=<\/span> <span class=\"kc\">null<\/span><span class=\"o\">)<\/span> <span class=\"o\">{<\/span>\r\n                                <span class=\"n\">logger<\/span><span class=\"o\">.<\/span><span class=\"na\">error<\/span><span class=\"o\">(<\/span><span class=\"s\">\"Something bad happened\"<\/span><span class=\"o\">,<\/span> <span class=\"n\">e<\/span><span class=\"o\">);<\/span>\r\n                            <span class=\"o\">}<\/span>\r\n                        <span class=\"o\">}<\/span>\r\n                    <span class=\"o\">});<\/span>\r\n                <span class=\"o\">}<\/span>\r\n                <span class=\"k\">else<\/span> <span class=\"nf\">if<\/span> <span class=\"o\">(<\/span><span class=\"nc\">StringUtils<\/span><span class=\"o\">.<\/span><span class=\"na\">containsIgnoreCase<\/span><span class=\"o\">(<\/span><span class=\"n\">msg<\/span><span class=\"o\">,<\/span><span class=\"s\">\"IoT\"<\/span><span class=\"o\">))<\/span> <span class=\"o\">{<\/span>\r\n                    <span class=\"n\">producer<\/span><span class=\"o\">.<\/span><span class=\"na\">send<\/span><span class=\"o\">(<\/span><span class=\"k\">new<\/span> <span class=\"nc\">ProducerRecord<\/span><span class=\"o\">&lt;&gt;(<\/span><span class=\"n\">mytopic<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"IoT\"<\/span><span class=\"o\">,<\/span> <span class=\"n\">msg<\/span><span class=\"o\">),<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">Callback<\/span><span class=\"o\">()<\/span> <span class=\"o\">{<\/span>\r\n                        <span class=\"nd\">@Override<\/span>\r\n                        <span class=\"kd\">public<\/span> <span class=\"kt\">void<\/span> <span class=\"nf\">onCompletion<\/span><span class=\"o\">(<\/span><span class=\"nc\">RecordMetadata<\/span> <span class=\"n\">recordMetadata<\/span><span class=\"o\">,<\/span> <span class=\"nc\">Exception<\/span> <span class=\"n\">e<\/span><span class=\"o\">)<\/span> <span class=\"o\">{<\/span>\r\n                            <span class=\"k\">if<\/span> <span class=\"o\">(<\/span><span class=\"n\">e<\/span> <span class=\"o\">!=<\/span> <span class=\"kc\">null<\/span><span class=\"o\">)<\/span> <span class=\"o\">{<\/span>\r\n                                <span class=\"n\">logger<\/span><span class=\"o\">.<\/span><span class=\"na\">error<\/span><span class=\"o\">(<\/span><span class=\"s\">\"Something bad happened\"<\/span><span class=\"o\">,<\/span> <span class=\"n\">e<\/span><span class=\"o\">);<\/span>\r\n                            <span class=\"o\">}<\/span>\r\n                        <span class=\"o\">}<\/span>\r\n                    <span class=\"o\">});<\/span>\r\n                <span class=\"o\">}<\/span>\r\n                <span class=\"k\">else<\/span> <span class=\"nf\">if<\/span> <span class=\"o\">(<\/span><span class=\"nc\">StringUtils<\/span><span class=\"o\">.<\/span><span class=\"na\">containsIgnoreCase<\/span><span class=\"o\">(<\/span><span class=\"n\">msg<\/span><span class=\"o\">,<\/span><span class=\"s\">\"5G\"<\/span><span class=\"o\">))<\/span> <span class=\"o\">{<\/span>\r\n                    <span class=\"n\">producer<\/span><span class=\"o\">.<\/span><span class=\"na\">send<\/span><span class=\"o\">(<\/span><span class=\"k\">new<\/span> <span class=\"nc\">ProducerRecord<\/span><span class=\"o\">&lt;&gt;(<\/span><span class=\"n\">mytopic<\/span><span class=\"o\">,<\/span><span class=\"mi\">5<\/span><span class=\"o\">,<\/span><span class=\"s\">\"5G\"<\/span><span class=\"o\">,<\/span> <span class=\"n\">msg<\/span><span class=\"o\">));<\/span>\r\n                <span class=\"o\">}<\/span>\r\n                <span class=\"k\">else<\/span><span class=\"o\">{<\/span>\r\n                    <span class=\"n\">producer<\/span><span class=\"o\">.<\/span><span class=\"na\">send<\/span><span class=\"o\">(<\/span><span class=\"k\">new<\/span> <span class=\"nc\">ProducerRecord<\/span><span class=\"o\">&lt;&gt;(<\/span><span class=\"n\">mytopic<\/span><span class=\"o\">,<\/span> <span class=\"kc\">null<\/span><span class=\"o\">,<\/span> <span class=\"n\">msg<\/span><span class=\"o\">),<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">Callback<\/span><span class=\"o\">()<\/span> <span class=\"o\">{<\/span>\r\n                        <span class=\"nd\">@Override<\/span>\r\n                        <span class=\"kd\">public<\/span> <span class=\"kt\">void<\/span> <span class=\"nf\">onCompletion<\/span><span class=\"o\">(<\/span><span class=\"nc\">RecordMetadata<\/span> <span class=\"n\">recordMetadata<\/span><span class=\"o\">,<\/span> <span class=\"nc\">Exception<\/span> <span class=\"n\">e<\/span><span class=\"o\">)<\/span> <span class=\"o\">{<\/span>\r\n                            <span class=\"k\">if<\/span> <span class=\"o\">(<\/span><span class=\"n\">e<\/span> <span class=\"o\">!=<\/span> <span class=\"kc\">null<\/span><span class=\"o\">)<\/span> <span class=\"o\">{<\/span>\r\n                                <span class=\"n\">logger<\/span><span class=\"o\">.<\/span><span class=\"na\">error<\/span><span class=\"o\">(<\/span><span class=\"s\">\"Something bad happened\"<\/span><span class=\"o\">,<\/span> <span class=\"n\">e<\/span><span class=\"o\">);<\/span>\r\n                            <span class=\"o\">}<\/span>\r\n                        <span class=\"o\">}<\/span>\r\n                    <span class=\"o\">});<\/span>\r\n                <span class=\"o\">}<\/span>\r\n            <span class=\"o\">}<\/span>\r\n        <span class=\"o\">}<\/span>\r\n        <span class=\"n\">logger<\/span><span class=\"o\">.<\/span><span class=\"na\">info<\/span><span class=\"o\">(<\/span><span class=\"s\">\"End of application\"<\/span><span class=\"o\">);<\/span>\r\n    <span class=\"o\">}<\/span>\r\n\r\n<span class=\"err\">\u3000<\/span><span class=\"cm\">\/** ---------------------- Hosebird Client\u3092\u4f5c\u6210 ---------------------- *\/<\/span>\r\n    <span class=\"kd\">public<\/span> <span class=\"nc\">Client<\/span> <span class=\"nf\">createTwitterClient<\/span><span class=\"o\">(<\/span><span class=\"nc\">BlockingQueue<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">&gt;<\/span> <span class=\"n\">msgQueue<\/span><span class=\"o\">){<\/span>\r\n\r\n        <span class=\"nc\">Hosts<\/span> <span class=\"n\">hosebirdHosts<\/span> <span class=\"o\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">HttpHosts<\/span><span class=\"o\">(<\/span><span class=\"nc\">Constants<\/span><span class=\"o\">.<\/span><span class=\"na\">STREAM_HOST<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"nc\">StatusesFilterEndpoint<\/span> <span class=\"n\">hosebirdEndpoint<\/span> <span class=\"o\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">StatusesFilterEndpoint<\/span><span class=\"o\">();<\/span>\r\n\r\n        <span class=\"n\">hosebirdEndpoint<\/span><span class=\"o\">.<\/span><span class=\"na\">trackTerms<\/span><span class=\"o\">(<\/span><span class=\"n\">terms<\/span><span class=\"o\">);<\/span>\r\n\r\n        <span class=\"nc\">Authentication<\/span> <span class=\"n\">hosebirdAuth<\/span> <span class=\"o\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">OAuth1<\/span><span class=\"o\">(<\/span><span class=\"n\">consumerKey<\/span><span class=\"o\">,<\/span> <span class=\"n\">consumerSecret<\/span><span class=\"o\">,<\/span> <span class=\"n\">token<\/span><span class=\"o\">,<\/span> <span class=\"n\">secret<\/span><span class=\"o\">);<\/span>\r\n\r\n        <span class=\"nc\">ClientBuilder<\/span> <span class=\"n\">builder<\/span> <span class=\"o\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">ClientBuilder<\/span><span class=\"o\">()<\/span>\r\n                <span class=\"o\">.<\/span><span class=\"na\">name<\/span><span class=\"o\">(<\/span><span class=\"s\">\"Hosebird-Client-01\"<\/span><span class=\"o\">)<\/span>                              \r\n                <span class=\"o\">.<\/span><span class=\"na\">hosts<\/span><span class=\"o\">(<\/span><span class=\"n\">hosebirdHosts<\/span><span class=\"o\">)<\/span>\r\n                <span class=\"o\">.<\/span><span class=\"na\">authentication<\/span><span class=\"o\">(<\/span><span class=\"n\">hosebirdAuth<\/span><span class=\"o\">)<\/span>\r\n                <span class=\"o\">.<\/span><span class=\"na\">endpoint<\/span><span class=\"o\">(<\/span><span class=\"n\">hosebirdEndpoint<\/span><span class=\"o\">)<\/span>\r\n                <span class=\"o\">.<\/span><span class=\"na\">processor<\/span><span class=\"o\">(<\/span><span class=\"k\">new<\/span> <span class=\"nc\">StringDelimitedProcessor<\/span><span class=\"o\">(<\/span><span class=\"n\">msgQueue<\/span><span class=\"o\">));<\/span>\r\n\r\n        <span class=\"nc\">Client<\/span> <span class=\"n\">hosebirdClient<\/span> <span class=\"o\">=<\/span> <span class=\"n\">builder<\/span><span class=\"o\">.<\/span><span class=\"na\">build<\/span><span class=\"o\">();<\/span>\r\n        <span class=\"k\">return<\/span> <span class=\"n\">hosebirdClient<\/span><span class=\"o\">;<\/span>\r\n    <span class=\"o\">}<\/span>\r\n\r\n<span class=\"err\">\u3000<\/span> <span class=\"cm\">\/** ---------------------- kakfa producer\u3092\u4f5c\u6210 ---------------------- *\/<\/span>\r\n    <span class=\"kd\">public<\/span> <span class=\"nc\">KafkaProducer<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">,<\/span> <span class=\"nc\">String<\/span><span class=\"o\">&gt;<\/span> <span class=\"nf\">createKafkaProducer<\/span><span class=\"o\">(){<\/span>\r\n\r\n        <span class=\"nc\">String<\/span> <span class=\"n\">bootstrapServers<\/span> <span class=\"o\">=<\/span> <span class=\"s\">\"xxxxxxxxxxxxxxxx\"<\/span><span class=\"o\">;<\/span>\r\n\r\n        <span class=\"nc\">Properties<\/span> <span class=\"n\">properties<\/span> <span class=\"o\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">Properties<\/span><span class=\"o\">();<\/span>\r\n        <span class=\"n\">properties<\/span><span class=\"o\">.<\/span><span class=\"na\">setProperty<\/span><span class=\"o\">(<\/span><span class=\"nc\">ProducerConfig<\/span><span class=\"o\">.<\/span><span class=\"na\">BOOTSTRAP_SERVERS_CONFIG<\/span><span class=\"o\">,<\/span> <span class=\"n\">bootstrapServers<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"n\">properties<\/span><span class=\"o\">.<\/span><span class=\"na\">setProperty<\/span><span class=\"o\">(<\/span><span class=\"nc\">ProducerConfig<\/span><span class=\"o\">.<\/span><span class=\"na\">KEY_SERIALIZER_CLASS_CONFIG<\/span><span class=\"o\">,<\/span> <span class=\"nc\">StringSerializer<\/span><span class=\"o\">.<\/span><span class=\"na\">class<\/span><span class=\"o\">.<\/span><span class=\"na\">getName<\/span><span class=\"o\">());<\/span>\r\n        <span class=\"n\">properties<\/span><span class=\"o\">.<\/span><span class=\"na\">setProperty<\/span><span class=\"o\">(<\/span><span class=\"nc\">ProducerConfig<\/span><span class=\"o\">.<\/span><span class=\"na\">VALUE_SERIALIZER_CLASS_CONFIG<\/span><span class=\"o\">,<\/span> <span class=\"nc\">StringSerializer<\/span><span class=\"o\">.<\/span><span class=\"na\">class<\/span><span class=\"o\">.<\/span><span class=\"na\">getName<\/span><span class=\"o\">());<\/span>\r\n\r\n        <span class=\"n\">properties<\/span><span class=\"o\">.<\/span><span class=\"na\">setProperty<\/span><span class=\"o\">(<\/span><span class=\"nc\">ProducerConfig<\/span><span class=\"o\">.<\/span><span class=\"na\">ENABLE_IDEMPOTENCE_CONFIG<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"true\"<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"n\">properties<\/span><span class=\"o\">.<\/span><span class=\"na\">setProperty<\/span><span class=\"o\">(<\/span><span class=\"nc\">ProducerConfig<\/span><span class=\"o\">.<\/span><span class=\"na\">ACKS_CONFIG<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"all\"<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"n\">properties<\/span><span class=\"o\">.<\/span><span class=\"na\">setProperty<\/span><span class=\"o\">(<\/span><span class=\"nc\">ProducerConfig<\/span><span class=\"o\">.<\/span><span class=\"na\">RETRIES_CONFIG<\/span><span class=\"o\">,<\/span> <span class=\"nc\">Integer<\/span><span class=\"o\">.<\/span><span class=\"na\">toString<\/span><span class=\"o\">(<\/span><span class=\"nc\">Integer<\/span><span class=\"o\">.<\/span><span class=\"na\">MAX_VALUE<\/span><span class=\"o\">));<\/span>\r\n        <span class=\"n\">properties<\/span><span class=\"o\">.<\/span><span class=\"na\">setProperty<\/span><span class=\"o\">(<\/span><span class=\"nc\">ProducerConfig<\/span><span class=\"o\">.<\/span><span class=\"na\">MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"5\"<\/span><span class=\"o\">);<\/span> \r\n\r\n        <span class=\"n\">properties<\/span><span class=\"o\">.<\/span><span class=\"na\">setProperty<\/span><span class=\"o\">(<\/span><span class=\"nc\">ProducerConfig<\/span><span class=\"o\">.<\/span><span class=\"na\">COMPRESSION_TYPE_CONFIG<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"snappy\"<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"n\">properties<\/span><span class=\"o\">.<\/span><span class=\"na\">setProperty<\/span><span class=\"o\">(<\/span><span class=\"nc\">ProducerConfig<\/span><span class=\"o\">.<\/span><span class=\"na\">LINGER_MS_CONFIG<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"20\"<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"n\">properties<\/span><span class=\"o\">.<\/span><span class=\"na\">setProperty<\/span><span class=\"o\">(<\/span><span class=\"nc\">ProducerConfig<\/span><span class=\"o\">.<\/span><span class=\"na\">BATCH_SIZE_CONFIG<\/span><span class=\"o\">,<\/span> <span class=\"nc\">Integer<\/span><span class=\"o\">.<\/span><span class=\"na\">toString<\/span><span class=\"o\">(<\/span><span class=\"mi\">32<\/span><span class=\"o\">*<\/span><span class=\"mi\">1024<\/span><span class=\"o\">));<\/span>\r\n\r\n        <span class=\"nc\">KafkaProducer<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">,<\/span> <span class=\"nc\">String<\/span><span class=\"o\">&gt;<\/span> <span class=\"n\">producer<\/span> <span class=\"o\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">KafkaProducer<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">,<\/span> <span class=\"nc\">String<\/span><span class=\"o\">&gt;(<\/span><span class=\"n\">properties<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"k\">return<\/span> <span class=\"n\">producer<\/span><span class=\"o\">;<\/span>\r\n    <span class=\"o\">}<\/span>\r\n\r\n<span class=\"o\">}<\/span>\r\n<\/code><\/pre>\n<h4>\u5b9e\u65f6\u6d41\u6d88\u8d39\u8005<\/h4>\n<p>Spark Streaming\u63d0\u4f9b\u4e86\u4ee5\u5fae\u6279\u5904\u7406\u65b9\u5f0f\u91cd\u590d\u5904\u7406\u6d41\u6570\u636e\u7684\u529f\u80fd\uff0c\u95f4\u9694\u901a\u5e38\u4e3a\u51e0\u79d2\u5230\u51e0\u5206\u949f\u3002\u7531\u4e8eE-Mapreduce\u7684Kafka Broker\u7248\u672c\u4e3a0.10.0\u6216\u66f4\u9ad8\uff0c\u56e0\u6b64\u53ef\u4ee5\u76f4\u63a5\u4ece\u6bcf\u4e2aPartition\u4e2d\u8bfb\u53d6\u6d88\u606f\uff0c\u800c\u65e0\u9700\u4f7f\u7528kafka\u63a5\u6536\u5668\u8fdb\u884c\u4e0eKafka\u7684\u96c6\u6210\u3002\u672c\u6b21\u4f7f\u7528\u7684\u95f4\u9694\u6570\u636e\u5982\u4e0b\u6240\u793a\u3002<\/p>\n<h5>DStream\u7684\u6279\u5904\u7406\u95f4\u9694\u4e3a1\u79d2\u3002<\/h5>\n<h5>\u6ed1\u52a8\u95f4\u9694\u4e3a1\u79d2\u3002<\/h5>\n<h5>\u7a97\u53e3\u5c3a\u5bf8300\u79d2\u3002<\/h5>\n<p>\u5728\u672c\u5730\u7684Java\u5f00\u53d1\u73af\u5883\u4e2d\uff0c\u53ef\u4ee5\u53c2\u8003\u4ee5\u4e0b\u793a\u4f8b\u4ee3\u7801\u521b\u5efa\u4e00\u4e2aSpark Streaming Consumer\u5e76\u751f\u6210jar\u6587\u4ef6\u3002<\/p>\n<pre class=\"post-pre\"><code>\r\n<span class=\"kd\">public<\/span> <span class=\"kd\">class<\/span> <span class=\"nc\">SparkStreamingPoC<\/span> <span class=\"o\">{<\/span>\r\n\r\n    <span class=\"kd\">private<\/span> <span class=\"kd\">static<\/span> <span class=\"nc\">JsonParser<\/span> <span class=\"n\">jsonParser<\/span> <span class=\"o\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">JsonParser<\/span><span class=\"o\">();<\/span>\r\n\r\n    <span class=\"kd\">public<\/span> <span class=\"kd\">static<\/span> <span class=\"kt\">void<\/span> <span class=\"nf\">main<\/span><span class=\"o\">(<\/span><span class=\"nc\">String<\/span><span class=\"o\">[]<\/span> <span class=\"n\">args<\/span><span class=\"o\">)<\/span> <span class=\"kd\">throws<\/span> <span class=\"nc\">InterruptedException<\/span> <span class=\"o\">{<\/span>\r\n\r\n        <span class=\"nc\">SparkConf<\/span> <span class=\"n\">sparkConf<\/span> <span class=\"o\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">SparkConf<\/span><span class=\"o\">().<\/span><span class=\"na\">setAppName<\/span><span class=\"o\">(<\/span><span class=\"s\">\"TweetsApp\"<\/span><span class=\"o\">);<\/span>\r\n\r\n        <span class=\"nc\">JavaStreamingContext<\/span> <span class=\"n\">streamingContext<\/span> <span class=\"o\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">JavaStreamingContext<\/span><span class=\"o\">(<\/span>\r\n                <span class=\"n\">sparkConf<\/span><span class=\"o\">,<\/span> <span class=\"nc\">Durations<\/span><span class=\"o\">.<\/span><span class=\"na\">seconds<\/span><span class=\"o\">(<\/span><span class=\"mi\">1<\/span><span class=\"o\">));<\/span>\r\n\r\n         <span class=\"cm\">\/** ---------------------- kafka\u30d1\u30e9\u30e1\u30fc\u30bf\u8a2d\u5b9a ---------------------- *\/<\/span>\r\n        <span class=\"nc\">Map<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">,<\/span> <span class=\"nc\">Object<\/span><span class=\"o\">&gt;<\/span> <span class=\"n\">kafkaParams<\/span> <span class=\"o\">=<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">HashMap<\/span><span class=\"o\">&lt;&gt;();<\/span>\r\n        <span class=\"n\">kafkaParams<\/span><span class=\"o\">.<\/span><span class=\"na\">put<\/span><span class=\"o\">(<\/span><span class=\"s\">\"bootstrap.servers\"<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"xxxxxxxxxxxxxxxx\"<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"n\">kafkaParams<\/span><span class=\"o\">.<\/span><span class=\"na\">put<\/span><span class=\"o\">(<\/span><span class=\"s\">\"key.deserializer\"<\/span><span class=\"o\">,<\/span> <span class=\"nc\">StringDeserializer<\/span><span class=\"o\">.<\/span><span class=\"na\">class<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"n\">kafkaParams<\/span><span class=\"o\">.<\/span><span class=\"na\">put<\/span><span class=\"o\">(<\/span><span class=\"s\">\"value.deserializer\"<\/span><span class=\"o\">,<\/span> <span class=\"nc\">StringDeserializer<\/span><span class=\"o\">.<\/span><span class=\"na\">class<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"n\">kafkaParams<\/span><span class=\"o\">.<\/span><span class=\"na\">put<\/span><span class=\"o\">(<\/span><span class=\"s\">\"group.id\"<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"tweets_group\"<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"n\">kafkaParams<\/span><span class=\"o\">.<\/span><span class=\"na\">put<\/span><span class=\"o\">(<\/span><span class=\"s\">\"auto.offset.reset\"<\/span><span class=\"o\">,<\/span> <span class=\"s\">\"latest\"<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"n\">kafkaParams<\/span><span class=\"o\">.<\/span><span class=\"na\">put<\/span><span class=\"o\">(<\/span><span class=\"s\">\"enable.auto.commit\"<\/span><span class=\"o\">,<\/span> <span class=\"kc\">false<\/span><span class=\"o\">);<\/span>\r\n\r\n        <span class=\"nc\">Collection<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">&gt;<\/span> <span class=\"n\">topics<\/span> <span class=\"o\">=<\/span> <span class=\"nc\">Arrays<\/span><span class=\"o\">.<\/span><span class=\"na\">asList<\/span><span class=\"o\">(<\/span><span class=\"s\">\"tweets_poc\"<\/span><span class=\"o\">);<\/span>\r\n\r\n\r\n        <span class=\"cm\">\/** ---------------------- Spark Stream\u4f5c\u6210 ---------------------- *\/<\/span>\r\n       <span class=\"nc\">JavaInputDStream<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">ConsumerRecord<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">,<\/span> <span class=\"nc\">String<\/span><span class=\"o\">&gt;&gt;<\/span> <span class=\"n\">stream<\/span> <span class=\"o\">=<\/span>\r\n                <span class=\"nc\">KafkaUtils<\/span><span class=\"o\">.<\/span><span class=\"na\">createDirectStream<\/span><span class=\"o\">(<\/span>\r\n                        <span class=\"n\">streamingContext<\/span><span class=\"o\">,<\/span>\r\n                        <span class=\"nc\">LocationStrategies<\/span><span class=\"o\">.<\/span><span class=\"na\">PreferConsistent<\/span><span class=\"o\">(),<\/span>\r\n                        <span class=\"nc\">ConsumerStrategies<\/span><span class=\"o\">.&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">,<\/span> <span class=\"nc\">String<\/span><span class=\"o\">&gt;<\/span> <span class=\"nf\">Subscribe<\/span><span class=\"o\">(<\/span><span class=\"n\">topics<\/span><span class=\"o\">,<\/span> <span class=\"n\">kafkaParams<\/span><span class=\"o\">));<\/span>\r\n\r\n        <span class=\"nc\">JavaPairDStream<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">,<\/span><span class=\"nc\">Integer<\/span><span class=\"o\">&gt;<\/span> <span class=\"n\">s1<\/span> <span class=\"o\">=<\/span> <span class=\"n\">stream<\/span><span class=\"o\">.<\/span><span class=\"na\">mapToPair<\/span><span class=\"o\">(<\/span><span class=\"n\">record<\/span> <span class=\"o\">-&gt;<\/span> <span class=\"k\">new<\/span> <span class=\"nc\">Tuple2<\/span><span class=\"o\">&lt;&gt;(<\/span><span class=\"n\">record<\/span><span class=\"o\">.<\/span><span class=\"na\">key<\/span><span class=\"o\">(),<\/span><span class=\"mi\">1<\/span><span class=\"o\">));<\/span>\r\n        <span class=\"nc\">JavaPairDStream<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">String<\/span><span class=\"o\">,<\/span><span class=\"nc\">Integer<\/span><span class=\"o\">&gt;<\/span> <span class=\"n\">s2<\/span> <span class=\"o\">=<\/span> <span class=\"n\">s1<\/span><span class=\"o\">.<\/span><span class=\"na\">reduceByKeyAndWindow<\/span><span class=\"o\">((<\/span><span class=\"n\">a<\/span><span class=\"o\">,<\/span><span class=\"n\">b<\/span><span class=\"o\">)<\/span> <span class=\"o\">-&gt;<\/span> <span class=\"n\">a<\/span> <span class=\"o\">+<\/span> <span class=\"n\">b<\/span><span class=\"o\">,<\/span><span class=\"nc\">Durations<\/span><span class=\"o\">.<\/span><span class=\"na\">seconds<\/span><span class=\"o\">(<\/span><span class=\"mi\">300<\/span><span class=\"o\">),<\/span><span class=\"nc\">Durations<\/span><span class=\"o\">.<\/span><span class=\"na\">seconds<\/span><span class=\"o\">(<\/span><span class=\"mi\">1<\/span><span class=\"o\">));<\/span>\r\n        <span class=\"nc\">JavaPairDStream<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">Integer<\/span><span class=\"o\">,<\/span><span class=\"nc\">String<\/span><span class=\"o\">&gt;<\/span> <span class=\"n\">s3<\/span> <span class=\"o\">=<\/span> <span class=\"n\">s2<\/span><span class=\"o\">.<\/span><span class=\"na\">mapToPair<\/span><span class=\"o\">(<\/span><span class=\"nl\">Tuple2:<\/span><span class=\"o\">:<\/span><span class=\"n\">swap<\/span><span class=\"o\">);<\/span>\r\n        <span class=\"nc\">JavaPairDStream<\/span><span class=\"o\">&lt;<\/span><span class=\"nc\">Integer<\/span><span class=\"o\">,<\/span><span class=\"nc\">String<\/span><span class=\"o\">&gt;<\/span> <span class=\"n\">s4<\/span> <span class=\"o\">=<\/span> <span class=\"n\">s3<\/span><span class=\"o\">.<\/span><span class=\"na\">transformToPair<\/span><span class=\"o\">(<\/span><span class=\"n\">rdd<\/span> <span class=\"o\">-&gt;<\/span> <span class=\"n\">rdd<\/span><span class=\"o\">.<\/span><span class=\"na\">sortByKey<\/span><span class=\"o\">(<\/span><span class=\"kc\">false<\/span><span class=\"o\">));<\/span>\r\n        <span class=\"n\">s4<\/span><span class=\"o\">.<\/span><span class=\"na\">print<\/span><span class=\"o\">();<\/span>\r\n\r\n        <span class=\"n\">streamingContext<\/span><span class=\"o\">.<\/span><span class=\"na\">start<\/span><span class=\"o\">();<\/span>\r\n        <span class=\"n\">streamingContext<\/span><span class=\"o\">.<\/span><span class=\"na\">awaitTermination<\/span><span class=\"o\">();<\/span>\r\n\r\n    <span class=\"o\">}<\/span>\r\n<span class=\"o\">}<\/span>\r\n\r\n<\/code><\/pre>\n<h4>\u8fd0\u884cSpark\u5e94\u7528\u7a0b\u5e8f<\/h4>\n<p>\u9996\u5148\uff0c\u5c06\u521b\u5efa\u7684Kafka Producer\u548cSpark Streaming Consumer\u7684Jar\u6587\u4ef6\u4e0a\u4f20\u5230\u963f\u91cc\u4e91OSS\u4e0a\u3002\u7136\u540e\uff0c\u5728ECS\u4e0a\u901a\u8fc7ssh\u767b\u5f55\uff0c\u5e76\u4f7f\u7528ossutil\u7b49\u5de5\u5177\u5c06Kafka Producer\u7684jar\u6587\u4ef6\uff08TweetsProducerTest-1.0-jar-with-dependencies.jar\uff09\u4e0b\u8f7d\u5230ECS\u3002\u4e0b\u8f7d\u5b8c\u6210\u540e\uff0c\u4f7f\u7528\u4e0b\u9762\u7684\u547d\u4ee4\u542f\u52a8Kafka Producer\uff0c\u6536\u96c6\u53d1\u5e03\u5230Twitter\u4e0a\u7684\u6d88\u606f\u3002<\/p>\n<pre class=\"post-pre\"><code>java -jar TweetsProducerTest-1.0-jar-with-dependencies.jar\r\n<\/code><\/pre>\n<p>\u60a8\u4e5f\u53ef\u4ee5\u5b9e\u65f6\u4ece E-MapReduce \u63a7\u5236\u53f0\u4e2d\u786e\u8ba4\u5411 Kafka \u5199\u5165\u7684 Topic \u7684 Partition\u3001Offset\u3001ISR \u7b49\u4fe1\u606f\u3002<\/p>\n<div><img decoding=\"async\" class=\"post-images\" title=\"f:id:sbc_kou:20190703171507p:plain\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7b8f913a08637a699a80\/33-1.png\" alt=\"f:id:sbc_kou:20190703171507p:plain\" \/><\/div>\n<h5>\u5927\u5e08\u53c2\u6570<\/h5>\n<p>\u7531\u4e8eE-MapReduce\u4f7f\u7528Yarn\u6a21\u5f0f\uff0c\u56e0\u6b64\u8bf7\u5c06\u4e0b\u56fe\u4e2d\u7ea2\u6846\u4e2d\u7684CLI\u53c2\u6570\uff08master\uff09\u6307\u5b9a\u4e3aYarn\u3002<\/p>\n<h5>\u5f00\u653e\u6e90\u7801\u8f6f\u4ef6\u534f\u8bae<\/h5>\n<p>\u4eceOSS\u4e0b\u8f7djar\u6587\u4ef6\u5e76\u6267\u884c\u3002<\/p>\n<div><img decoding=\"async\" class=\"post-images\" title=\"f:id:sbc_kou:20190703164615p:plain\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7b8f913a08637a699a80\/38-0.png\" alt=\"f:id:sbc_kou:20190703164615p:plain\" \/><\/div>\n<p>\u53ef\u4ee5\u786e\u8ba4ETL\u5904\u7406\u7684\u7ed3\u679c\u5982\u4e0b\u56fe\u6240\u793a\u3002<\/p>\n<div><img decoding=\"async\" class=\"post-images\" title=\"f:id:sbc_kou:20190703172435p:plain\" src=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7b8f913a08637a699a80\/40-0.png\" alt=\"f:id:sbc_kou:20190703172435p:plain\" \/><\/div>\n<h4>\u6700\u7ec8\u6700\u540e<\/h4>\n<p>\u8fd9\u4e2a\u4e8b\u60c5\u4f60\u89c9\u5f97\u600e\u4e48\u6837\u5462\uff1f (Zh\u00e8 ge n\u01d0 ne?)<\/p>\n<p>\u5728\u666e\u901a\u60c5\u51b5\u4e0b\uff0c\u53ef\u4ee5\u901a\u8fc7ssh\u7b49\u65b9\u5f0f\u767b\u5f55\u5230\u4e3b\u8282\u70b9\u6216\u8005\u8df3\u677f\u670d\u52a1\u5668\uff0c\u5728\u547d\u4ee4\u884c\u4e2d\u6267\u884c\u4f5c\u4e1a\u3002\u4f46\u662f\u5728\u672c\u6b21\u8fd9\u6837\u7684\u60c5\u51b5\u4e0b\uff0c\u53ef\u4ee5\u901a\u8fc7E-MapReduce\u63a7\u5236\u53f0\u7684\u6570\u636e\u5e73\u53f0\uff0c\u4f7f\u7528\u56fe\u5f62\u754c\u9762\u6765\u64cd\u4f5c\u4f5c\u4e1a\u7684\u6267\u884c\u3002\u6b64\u5916\uff0c\u53ef\u4ee5\u7acb\u5373\u53cd\u6620\u4f5c\u4e1a\u7ed3\u679c\u7684\u7279\u70b9\u4e5f\u5bf9\u4e8e\u4f5c\u4e1a\u7684\u6545\u969c\u6392\u9664\u975e\u5e38\u6709\u7528\u3002<\/p>\n<p>\u6211\u4eec\u5728E-Mapreduce\u4e0a\u6784\u5efa\u4e86\u4e00\u4e2a\u4f7f\u7528Spark Streaming\u548cKafka\u7684\u6d41\u6570\u636e\u5904\u7406\u7cfb\u7edf\uff0c\u5e76\u4ecb\u7ecd\u4e86\u5176\u9a8c\u8bc1\u7ed3\u679c\u3002\u5b9e\u9645\u4e0a\uff0c\u9664\u4e86\u6211\u4eec\u4ecb\u7ecd\u7684Apache Spark\u4f5c\u4e3a\u901a\u7528\u7684\u5b9e\u65f6\u6570\u636e\u5904\u7406\u5e73\u53f0\u4e4b\u5916\uff0c\u60a8\u8fd8\u53ef\u4ee5\u9009\u62e9\u4f7f\u7528Apache Flink\u3002\u6211\u4eec\u5efa\u8bae\u5df2\u7ecf\u719f\u6089Flink\u7684\u5f00\u53d1\u4eba\u5458\u53ef\u4ee5\u53c2\u8003\u963f\u91cc\u4e91\u7684\u6258\u7ba1\u670d\u52a1Realtime Compute\u3002<br \/>\n\u6b64\u5916\uff0c\u9664\u4e86Twitter\u6d88\u606f\u5916\uff0c\u5982\u679c\u60a8\u60f3\u8981\u5b9e\u73b0\u5bf9\u65e5\u5e38\u5404\u79cd\u6d41\u91cf\u65e5\u5fd7\u3001\u90ae\u4ef6\u7b49\u6570\u636e\u7684\u5b9e\u65f6\u5206\u6790\u548c\u76d1\u63a7\uff0c\u4e5f\u53ef\u4ee5\u8003\u8651\u4f7f\u7528\u8be5\u7cfb\u7edf\u3002\u8bf7\u52a1\u5fc5\u5c1d\u8bd5\u4f7f\u7528\u4e00\u4e0b\uff01<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u5728\u8fdb\u884cWeb\u8bbf\u95ee\u5206\u6790\u3001\u5b9e\u65f6\u65e5\u5fd7\u76d1\u63a7\u548c\u5f02\u5e38\u68c0\u6d4b\u3001\u793e\u4ea4\u5a92\u4f53\u5206\u6790\u7b49\u5de5\u4f5c\u65f6\uff0c\u4eba\u4eec\u5e38\u5e38\u4f1a\u4f7f\u7528\u7ec4\u5408\u4e86\u57fa\u4e8e\u5f00\u6e90\u7684\u5206\u5e03\u5f0f\u6d41\u5904\u7406 [&hellip;]<\/p>\n","protected":false},"author":6,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-46898","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"yoast_head":"<!-- This site is optimized with the Yoast SEO Premium plugin v21.5 (Yoast SEO v21.5) - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>\u6709\u5173Kafka\u548cSpark Streaming\u7684\u96c6\u6210 - Blog - Silicon Cloud<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.silicloud.com\/zh\/blog\/\u6709\u5173kafka\u548cspark-streaming\u7684\u96c6\u6210\u3002\/\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u6709\u5173Kafka\u548cSpark Streaming\u7684\u96c6\u6210\" \/>\n<meta property=\"og:description\" content=\"\u5728\u8fdb\u884cWeb\u8bbf\u95ee\u5206\u6790\u3001\u5b9e\u65f6\u65e5\u5fd7\u76d1\u63a7\u548c\u5f02\u5e38\u68c0\u6d4b\u3001\u793e\u4ea4\u5a92\u4f53\u5206\u6790\u7b49\u5de5\u4f5c\u65f6\uff0c\u4eba\u4eec\u5e38\u5e38\u4f1a\u4f7f\u7528\u7ec4\u5408\u4e86\u57fa\u4e8e\u5f00\u6e90\u7684\u5206\u5e03\u5f0f\u6d41\u5904\u7406 [&hellip;]\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.silicloud.com\/zh\/blog\/\u6709\u5173kafka\u548cspark-streaming\u7684\u96c6\u6210\u3002\/\" \/>\n<meta property=\"og:site_name\" content=\"Blog - Silicon Cloud\" \/>\n<meta property=\"article:published_time\" content=\"2023-11-02T12:52:16+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2024-04-30T10:10:09+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7b8f913a08637a699a80\/12-0.png\" \/>\n<meta name=\"author\" content=\"\u6587, \u7fd4\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"\u6587, \u7fd4\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"5 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e6%9c%89%e5%85%b3kafka%e5%92%8cspark-streaming%e7%9a%84%e9%9b%86%e6%88%90%e3%80%82\/\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e6%9c%89%e5%85%b3kafka%e5%92%8cspark-streaming%e7%9a%84%e9%9b%86%e6%88%90%e3%80%82\/\",\"name\":\"\u6709\u5173Kafka\u548cSpark Streaming\u7684\u96c6\u6210 - Blog - Silicon Cloud\",\"isPartOf\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\"},\"datePublished\":\"2023-11-02T12:52:16+00:00\",\"dateModified\":\"2024-04-30T10:10:09+00:00\",\"author\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/64d5cc7727fffbff2f9a2a8da1de3e5c\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e6%9c%89%e5%85%b3kafka%e5%92%8cspark-streaming%e7%9a%84%e9%9b%86%e6%88%90%e3%80%82\/#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.silicloud.com\/zh\/blog\/%e6%9c%89%e5%85%b3kafka%e5%92%8cspark-streaming%e7%9a%84%e9%9b%86%e6%88%90%e3%80%82\/\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e6%9c%89%e5%85%b3kafka%e5%92%8cspark-streaming%e7%9a%84%e9%9b%86%e6%88%90%e3%80%82\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.silicloud.com\/zh\/blog\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u6709\u5173Kafka\u548cSpark Streaming\u7684\u96c6\u6210\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#website\",\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/\",\"name\":\"Blog - Silicon Cloud\",\"description\":\"\",\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/64d5cc7727fffbff2f9a2a8da1de3e5c\",\"name\":\"\u6587, \u7fd4\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/secure.gravatar.com\/avatar\/920c3d673e0bccacc98e5e6b7149bb3c22edd8d39cb753e5d7d7e471498118a1?s=96&d=mm&r=g\",\"contentUrl\":\"https:\/\/secure.gravatar.com\/avatar\/920c3d673e0bccacc98e5e6b7149bb3c22edd8d39cb753e5d7d7e471498118a1?s=96&d=mm&r=g\",\"caption\":\"\u6587, \u7fd4\"},\"url\":\"https:\/\/www.silicloud.com\/zh\/blog\/author\/wenxiang\/\"},{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.silicloud.com\/zh\/blog\/%e6%9c%89%e5%85%b3kafka%e5%92%8cspark-streaming%e7%9a%84%e9%9b%86%e6%88%90%e3%80%82\/#local-main-organization-logo\",\"url\":\"\",\"contentUrl\":\"\",\"caption\":\"Blog - Silicon Cloud\"}]}<\/script>\n<!-- \/ Yoast SEO Premium plugin. -->","yoast_head_json":{"title":"\u6709\u5173Kafka\u548cSpark Streaming\u7684\u96c6\u6210 - Blog - Silicon Cloud","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.silicloud.com\/zh\/blog\/\u6709\u5173kafka\u548cspark-streaming\u7684\u96c6\u6210\u3002\/","og_locale":"zh_CN","og_type":"article","og_title":"\u6709\u5173Kafka\u548cSpark Streaming\u7684\u96c6\u6210","og_description":"\u5728\u8fdb\u884cWeb\u8bbf\u95ee\u5206\u6790\u3001\u5b9e\u65f6\u65e5\u5fd7\u76d1\u63a7\u548c\u5f02\u5e38\u68c0\u6d4b\u3001\u793e\u4ea4\u5a92\u4f53\u5206\u6790\u7b49\u5de5\u4f5c\u65f6\uff0c\u4eba\u4eec\u5e38\u5e38\u4f1a\u4f7f\u7528\u7ec4\u5408\u4e86\u57fa\u4e8e\u5f00\u6e90\u7684\u5206\u5e03\u5f0f\u6d41\u5904\u7406 [&hellip;]","og_url":"https:\/\/www.silicloud.com\/zh\/blog\/\u6709\u5173kafka\u548cspark-streaming\u7684\u96c6\u6210\u3002\/","og_site_name":"Blog - Silicon Cloud","article_published_time":"2023-11-02T12:52:16+00:00","article_modified_time":"2024-04-30T10:10:09+00:00","og_image":[{"url":"https:\/\/cdn.silicloud.com\/blog-img\/blog\/img\/657d7b8f913a08637a699a80\/12-0.png"}],"author":"\u6587, \u7fd4","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"\u6587, \u7fd4","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"5 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e6%9c%89%e5%85%b3kafka%e5%92%8cspark-streaming%e7%9a%84%e9%9b%86%e6%88%90%e3%80%82\/","url":"https:\/\/www.silicloud.com\/zh\/blog\/%e6%9c%89%e5%85%b3kafka%e5%92%8cspark-streaming%e7%9a%84%e9%9b%86%e6%88%90%e3%80%82\/","name":"\u6709\u5173Kafka\u548cSpark Streaming\u7684\u96c6\u6210 - Blog - Silicon Cloud","isPartOf":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website"},"datePublished":"2023-11-02T12:52:16+00:00","dateModified":"2024-04-30T10:10:09+00:00","author":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/64d5cc7727fffbff2f9a2a8da1de3e5c"},"breadcrumb":{"@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e6%9c%89%e5%85%b3kafka%e5%92%8cspark-streaming%e7%9a%84%e9%9b%86%e6%88%90%e3%80%82\/#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.silicloud.com\/zh\/blog\/%e6%9c%89%e5%85%b3kafka%e5%92%8cspark-streaming%e7%9a%84%e9%9b%86%e6%88%90%e3%80%82\/"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e6%9c%89%e5%85%b3kafka%e5%92%8cspark-streaming%e7%9a%84%e9%9b%86%e6%88%90%e3%80%82\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.silicloud.com\/zh\/blog\/"},{"@type":"ListItem","position":2,"name":"\u6709\u5173Kafka\u548cSpark Streaming\u7684\u96c6\u6210"}]},{"@type":"WebSite","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#website","url":"https:\/\/www.silicloud.com\/zh\/blog\/","name":"Blog - Silicon Cloud","description":"","inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/64d5cc7727fffbff2f9a2a8da1de3e5c","name":"\u6587, \u7fd4","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/#\/schema\/person\/image\/","url":"https:\/\/secure.gravatar.com\/avatar\/920c3d673e0bccacc98e5e6b7149bb3c22edd8d39cb753e5d7d7e471498118a1?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/920c3d673e0bccacc98e5e6b7149bb3c22edd8d39cb753e5d7d7e471498118a1?s=96&d=mm&r=g","caption":"\u6587, \u7fd4"},"url":"https:\/\/www.silicloud.com\/zh\/blog\/author\/wenxiang\/"},{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.silicloud.com\/zh\/blog\/%e6%9c%89%e5%85%b3kafka%e5%92%8cspark-streaming%e7%9a%84%e9%9b%86%e6%88%90%e3%80%82\/#local-main-organization-logo","url":"","contentUrl":"","caption":"Blog - Silicon Cloud"}]}},"_links":{"self":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/46898","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/users\/6"}],"replies":[{"embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/comments?post=46898"}],"version-history":[{"count":2,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/46898\/revisions"}],"predecessor-version":[{"id":93472,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/posts\/46898\/revisions\/93472"}],"wp:attachment":[{"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/media?parent=46898"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/categories?post=46898"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.silicloud.com\/zh\/blog\/wp-json\/wp\/v2\/tags?post=46898"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}