为什么kafka开启事务后生产出来的位移offset始终间隔为2,关闭事务后就正常

雪之源 发表于: 2020-06-12   最后更新时间: 2020-06-12 20:07:42   1,675 游览

1、kafka版本为kafka_2.11-2.2.1
2、问题是开启事务和关闭事务时生产的offset间隔值不相同。

代码如下

/*
 * Copyright 2018-2019 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.example;

import com.common.Foo2;
import org.apache.kafka.clients.admin.NewTopic;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.boot.ApplicationRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.Bean;
import org.springframework.core.task.SimpleAsyncTaskExecutor;
import org.springframework.core.task.TaskExecutor;
import org.springframework.kafka.annotation.KafkaListener;
import org.springframework.kafka.core.DefaultKafkaProducerFactory;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.kafka.core.ProducerFactory;
import org.springframework.kafka.support.converter.RecordMessageConverter;
import org.springframework.kafka.support.converter.StringJsonMessageConverter;

import java.util.HashMap;
import java.util.Map;

/**
 * Sample shows use of a dead letter topic.
 *
 * @author Gary Russell
 * @since 2.2.1
 *
 */
@SpringBootApplication
public class Application {
    private final Logger logger = LoggerFactory.getLogger(Application.class);

    private final TaskExecutor exec = new SimpleAsyncTaskExecutor();

    public static void main(String[] args) {
        SpringApplication.run(Application.class, args).close();
    }

    @Bean
    public Map<String,Object> producerConfig() {
        Map<String, Object> props = new HashMap<>();
        props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "10.60.153.252:9092,10.60.153.253:9093");
        props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.springframework.kafka.support.serializer" +
                ".JsonSerializer");
        props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.springframework.kafka.support.serializer" +
                ".JsonSerializer");
        props.put(ProducerConfig.PARTITIONER_CLASS_CONFIG, "com.MyPartitioner");
        props.put(ProducerConfig.ACKS_CONFIG, "all");
        props.put(ProducerConfig.RETRIES_CONFIG, 1);
        props.put(ProducerConfig.LINGER_MS_CONFIG, 10);
        props.put(ProducerConfig.BUFFER_MEMORY_CONFIG, 33554432);
        props.put(ProducerConfig.BATCH_SIZE_CONFIG, 524880);
        props.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, "gzip");
        return props;
    }


    @Bean
    public ProducerFactory<Object,Object> producerFactory() {
        DefaultKafkaProducerFactory<Object,Object> producerFactory =
                new DefaultKafkaProducerFactory<>(producerConfig());
        producerFactory.transactionCapable();
        producerFactory.setTransactionIdPrefix("trans-");
        return producerFactory;
    }

    /*public ProducerFactory<Object, Object> producerFactory() {
        return new DefaultKafkaProducerFactory<>(producerConfig());
    }*/


    @Bean
    public KafkaTemplate<Object, Object> kafkaTemplate(ProducerFactory producerFactory) {
        return new KafkaTemplate<Object, Object>(producerFactory);
    }

    @Bean
    public RecordMessageConverter converter() {
        return new StringJsonMessageConverter();
    }

    @KafkaListener(id = "wxyGroup", topics = "topic1")
    public void listen(Foo2 foo) {
        logger.info("Received: " + foo);
        if (foo.getFoo().startsWith("fail")) {
            throw new RuntimeException("failed");
        }
        this.exec.execute(() -> System.out.println("Hit Enter to terminate..."));
    }

    @KafkaListener(id = "wxyGroup2", topics = "wxy_test")
    public void dltListen(String in) {
        logger.info("Received from wxy_test: " + in);
        this.exec.execute(() -> System.out.println("Hit Enter to terminate..."));
    }

    @Bean
    public NewTopic topic() {
        return new NewTopic("topic1", 1, (short) 1);
    }

    @Bean
    public NewTopic dlt() {
        return new NewTopic("topic1.DLT", 1, (short) 1);
    }
    @Bean
    public NewTopic newTopic() {
        return new NewTopic("wxy_test", 3, (short) 1);
    }
    @Bean
    public ApplicationRunner runner() {
        return args -> {
            System.out.println("Hit Enter to terminate...");
            System.in.read();
        };
    }

}

此时生产的offset结果是

Kafka Message of topic:wxy_test/size:6/partition:1/offset:231 sends successfully at time:20200612 19:35:50-318! 
Kafka Message of topic:wxy_test/size:6/partition:0/offset:133 sends successfully at time:20200612 19:35:53-140! 
Received from wxy_test: aaaa
Kafka Message of topic:wxy_test/size:6/partition:0/offset:135 sends successfully at time:20200612 19:35:54-365! 
Received from wxy_test: aaaa
Kafka Message of topic:wxy_test/size:6/partition:2/offset:136 sends successfully at time:20200612 19:35:55-667! 
Received from wxy_test: aaaa
Kafka Message of topic:wxy_test/size:6/partition:1/offset:233 sends successfully at time:20200612 19:43:53-018! 
Received from wxy_test: aaaa
Kafka Message of topic:wxy_test/size:6/partition:0/offset:137 sends successfully at time:20200612 19:43:56-295! 
Received from wxy_test: aaaa
Kafka Message of topic:wxy_test/size:6/partition:0/offset:139 sends successfully at time:20200612 19:43:57-380! 
Received from wxy_test: aaaa

当我把上述的producerFactory()方法换成星号引去的方法时(代码如下)

/*
 * Copyright 2018-2019 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.example;

import com.common.Foo2;
import org.apache.kafka.clients.admin.NewTopic;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.boot.ApplicationRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.Bean;
import org.springframework.core.task.SimpleAsyncTaskExecutor;
import org.springframework.core.task.TaskExecutor;
import org.springframework.kafka.annotation.KafkaListener;
import org.springframework.kafka.core.DefaultKafkaProducerFactory;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.kafka.core.ProducerFactory;
import org.springframework.kafka.support.converter.RecordMessageConverter;
import org.springframework.kafka.support.converter.StringJsonMessageConverter;

import java.util.HashMap;
import java.util.Map;

/**
 * Sample shows use of a dead letter topic.
 *
 * @author Gary Russell
 * @since 2.2.1
 *
 */
@SpringBootApplication
public class Application {
    private final Logger logger = LoggerFactory.getLogger(Application.class);

    private final TaskExecutor exec = new SimpleAsyncTaskExecutor();

    public static void main(String[] args) {
        SpringApplication.run(Application.class, args).close();
    }

    @Bean
    public Map<String,Object> producerConfig() {
        Map<String, Object> props = new HashMap<>();
        props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "10.60.153.252:9092,10.60.153.253:9093");
        props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.springframework.kafka.support.serializer" +
                ".JsonSerializer");
        props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.springframework.kafka.support.serializer" +
                ".JsonSerializer");
        props.put(ProducerConfig.PARTITIONER_CLASS_CONFIG, "com.MyPartitioner");
        props.put(ProducerConfig.ACKS_CONFIG, "all");
        props.put(ProducerConfig.RETRIES_CONFIG, 1);
        props.put(ProducerConfig.LINGER_MS_CONFIG, 10);
        props.put(ProducerConfig.BUFFER_MEMORY_CONFIG, 33554432);
        props.put(ProducerConfig.BATCH_SIZE_CONFIG, 524880);
        props.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, "gzip");
        return props;
    }


    /*@Bean
    public ProducerFactory<Object,Object> producerFactory() {
        DefaultKafkaProducerFactory<Object,Object> producerFactory =
                new DefaultKafkaProducerFactory<>(producerConfig());
        producerFactory.transactionCapable();
        producerFactory.setTransactionIdPrefix("trans-");
        return producerFactory;
    }*/

    public ProducerFactory<Object, Object> producerFactory() {
        return new DefaultKafkaProducerFactory<>(producerConfig());
    }


    @Bean
    public KafkaTemplate<Object, Object> kafkaTemplate(ProducerFactory producerFactory) {
        return new KafkaTemplate<Object, Object>(producerFactory);
    }

    @Bean
    public RecordMessageConverter converter() {
        return new StringJsonMessageConverter();
    }

    @KafkaListener(id = "wxyGroup", topics = "topic1")
    public void listen(Foo2 foo) {
        logger.info("Received: " + foo);
        if (foo.getFoo().startsWith("fail")) {
            throw new RuntimeException("failed");
        }
        this.exec.execute(() -> System.out.println("Hit Enter to terminate..."));
    }

    @KafkaListener(id = "wxyGroup2", topics = "wxy_test")
    public void dltListen(String in) {
        logger.info("Received from wxy_test: " + in);
        this.exec.execute(() -> System.out.println("Hit Enter to terminate..."));
    }

    @Bean
    public NewTopic topic() {
        return new NewTopic("topic1", 1, (short) 1);
    }

    @Bean
    public NewTopic dlt() {
        return new NewTopic("topic1.DLT", 1, (short) 1);
    }
    @Bean
    public NewTopic newTopic() {
        return new NewTopic("wxy_test", 3, (short) 1);
    }
    @Bean
    public ApplicationRunner runner() {
        return args -> {
            System.out.println("Hit Enter to terminate...");
            System.in.read();
        };
    }

}

这时的log如下

2020-06-12 19:56:26.660  INFO 19108 --- [ad | producer-1] com.example.KafkaSendResultHandler       : Kafka Message of topic:wxy_test/size:4/partition:0/offset:144 sends successfully at time:20200612 19:56:26-641! 
2020-06-12 19:57:31.308  INFO 19108 --- [ad | producer-1] com.example.KafkaSendResultHandler       : Kafka Message of topic:wxy_test/size:4/partition:0/offset:145 sends successfully at time:20200612 19:57:31-306!

总之这时每个分区的offset都是连续的。

即开启事务时,offset间隔2,关闭事务后,offset间隔1.这个虽然对最终结果没有影响,但是就是百思不得其解啊。求大神看一下?第一次在orcHome提问题,希望能得到解答。

发表于 2020-06-12
添加评论

当你启用producerFactory.setTransactionIdPrefix("trans-");
在这种情况下,客户端会启用一个生产者缓存,用于确认kafka集群返回的信息。
我不太清楚springboot提供的kafka客户端是什么机制打印的,我想如果kafka的发送的消息先在缓存中,等待确认,那kafka返回确认的时候,从缓存中踢出(多笔一起踢出),客户端打印的时候,同一时间点,跳过了已经确认过的offset。

你可以加大发送压力,看看offset间隔是否会扩大。

最后,kafka发送有同步发送和异步发送,理论上异步发送都是按批次发的,offset也会间隔的更大才是。 你还是得压一下。

雪之源 -> 半兽人 4年前

我用jmeter进行了单分区的压力测试,Number of Threads->50、Ramp-up period(in seconds):2、loop count:100。所有的测试在单topic同一分区下进行,得到结果如下,

offset=16692
offset=16691
offset=16693
offset=16702
offset=16703
offset=16705
offset=16708
offset=16710
offset=16711
offset=16709
offset=16712
offset=16713
offset=16714
offset=16718
offset=16719
offset=16722
offset=16725
offset=16726
offset=16729
offset=16728
offset=16727
offset=16733
offset=16735
offset=16737
offset=16743
offset=16744
offset=16745
offset=16748
offset=16746
offset=16747
offset=16756
......

有的如你所说是间隔确实拉大了,但是有的确实连续的。
另外,当我用非springboot配置的事务去执行kafka时,可以保证offset是连续的。
代码如下:

public static void main(String[] args) {
    // Producer 配置信息,应该配置在属性文件中
    Properties props = new Properties();
    //指定要连接的 broker,不需要列出所有的 broker,但建议至少列出2个,以防某个 broker 挂了
    props.put("bootstrap.servers", "10.60.153.252:9092");
    props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    props.put("transactional.id", "test.transactional.id"); // 需要设置 transactional.id
    // 创建 Producer
    Producer<string, string=""> producer = new KafkaProducer<string, string="">(props);
    // 初始化事务
    producer.initTransactions();
    try {
        // 开启事务
        producer.beginTransaction();
        for (int i = 0; i &lt; 10; i++) { 
             // 发送消息 
             producer.send(new ProducerRecord("wxy_test", "message "+i), new Callback() {
             public void onCompletion(RecordMetadata metadata, Exception exception) {
                 if(exception != null) {
                     System.out.println("send message i failed with " + exception.getMessage());
                 } else {
                     // offset 是消息在 partition 中的编号,可以根据 offset 检索消息
                     System.out.println("message i sent to " + metadata.topic() + ", partition " + metadata.partition() + ", offset " + metadata.offset());
                 }
             }
         });
        }
        // 提交事务
        producer.commitTransaction();
    } catch(KafkaException e) {
        // 终止事务
        producer.abortTransaction();
    } finally {
        producer.close();
    }
}

我是觉得如果真是spring-kafka集成事务导致这个offset间隔2,那就是个bug啊。很容易让人觉得是消息传递不正常。

半兽人 -> 雪之源 4年前

spring的这个不太清楚了,不过事务会很影响性能。

你的答案

查看kafka相关的其他问题或提一个您自己的问题