代码仓库
会同步代码到 GitHub
https://github.com/turbo-duck/flink-demo
pom.xml
修改pom.xml
,需要加入 kafka
相关的包,和适配器。
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>org.example</groupId> <artifactId>flink-demo-01</artifactId> <version>1.0-SNAPSHOT</version> <properties> <maven.compiler.source>8</maven.compiler.source> <maven.compiler.target>8</maven.compiler.target> <flink.version>1.13.2</flink.version> <scala.binary.version>2.12</scala.binary.version> </properties> <dependencies> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-java</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-java_${scala.binary.version}</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-clients_${scala.binary.version}</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-kafka_2.11</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka-clients</artifactId> <version>3.0.0</version> </dependency> </dependencies> </project>
编写代码
设置 Kafka 配置
Properties properties = new Properties(); properties.setProperty("bootstrap.servers", "0.0.0.0:9092");
创建Kafka消费者
FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<>("test", new SimpleStringSchema(), properties);
添加数据源
DataStreamSource<String> data = env.addSource(consumer);
FlatMap
SingleOutputStreamOperator<Tuple2<String, Long>> wordAndOne = data.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() { public void flatMap(String s, Collector<Tuple2<String, Long>> collector) throws Exception { for (String word : s.split(" ")) { collector.collect(Tuple2.of(word, 1L)); } } });
计算求和
SingleOutputStreamOperator<Tuple2<String, Long>> result = wordAndOne .keyBy(new KeySelector<Tuple2<String, Long>, Object>() { @Override public Object getKey(Tuple2<String, Long> value) throws Exception { return value.f0; } }) .sum(1);
全部代码
package icu.wzk.demo04; import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.java.functions.KeySelector; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.flink.util.Collector; import java.util.Properties; public class StartApp { public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); Properties properties = new Properties(); properties.setProperty("bootstrap.servers","0.0.0.0:9092"); FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<>("test", new SimpleStringSchema(), properties); DataStreamSource<String> data = env.addSource(consumer); SingleOutputStreamOperator<Tuple2<String, Long>> wordAndOne = data.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() { public void flatMap(String s, Collector<Tuple2<String, Long>> collector) throws Exception { for (String word : s.split(" ")) { collector.collect(Tuple2.of(word, 1L)); } } }); SingleOutputStreamOperator<Tuple2<String, Long>> result = wordAndOne .keyBy(new KeySelector<Tuple2<String, Long>, Object>() { @Override public Object getKey(Tuple2<String, Long> value) throws Exception { return value.f0; } }) .sum(1); result.print(); env.execute(); } }
KafkaProducer
package icu.wzk.demo04; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.Producer; import org.apache.kafka.clients.producer.ProducerRecord; import java.util.Properties; public class TestKafkaProducer { public static void main(String[] args) throws InterruptedException { Properties props = new Properties(); props.put("bootstrap.servers", "0.0.0.0:9092"); props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); Producer<String, String> producer = new KafkaProducer<>(props); for (int i = 0; i < 500; i++) { String key = "key-" + i; String value = "value-" + i; ProducerRecord<String, String> record = new ProducerRecord<>("test", key, value); producer.send(record); System.out.println("send: " + key); Thread.sleep(200); } producer.close(); } }