Redis Advanced (bitmap bloom filter)

Table of Contents

1. What is a Bloom filter?

2. Bloom filter characteristics

3. Usage scenarios of Bloom filter

4. Code practice, unity of knowledge and action


1. What is Bloom filter

Bloom Filter is a probabilistic data structure with high space efficiency and fast processing speed, which is used to determine whether an element exists in a set.

A Bloom filter consists of a bit array (usually a large binary bit array) and a series of hash functions.

2. Bloom filter characteristics

Efficiently insert and query, reduce memory usage, do not save data information, only store a mark bit 0, 1. But the returned result is uncertain and slightly flawed.

Due to the existence of hash conflict, the judgment result of an element (returning 0 or 1) exists but the element does not necessarily exist. If the judgment result does not exist, it definitely does not exist (it may exist, if not, it definitely does not exist).

What is guaranteed is that if an element is not in the Bloom filter, this element must not exist.

3. Usage scenarios of Bloom filter

1. Solve the cache penetration problem and use it with Redis in conjunction with bitmap

2. Blacklist verification and spam identification

Binary array construction process

  1. Preload matching records
  2. Calculate the hash value of each record
  3. Calculate the bitmap array bits corresponding to the hash value
  4. Modify the value to 1

The process of finding elements

  1. Calculate the hash value of an element
  2. Calculate the biamap array bits corresponding to the hash value
  3. Find the value at the corresponding position in the array, 0 means it does not exist, 1 means it exists

4. Combining knowledge with action in code practice

BloomFilterUtil code

package com.toonyoo.redis.bloomFilter.util;


import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.stereotype.Component;

import javax.annotation.PostConstruct;


@Component
@Slf4j
public class BloomFilterUtil {

    public static final String CUSTOMER_REDIS_KEY = "customer:";

    // redis data whitelist key
    public static final String WHITE_LIST = "whiteList";

    @Autowired
    private StringRedisTemplate redisTemplate;


    /**
     * Bloom filter initializes whitelist data
     */
    @PostConstruct
    public void init() {

        // Take a piece of data in the current database table as an example
        String key = CUSTOMER_REDIS_KEY + "1266688003";

        // 1. Calculate hash value through key
        int value = Math.abs(key.hashCode());
        // 2. Calculate the slot corresponding to the bitmap by taking the modulo of the hash value and 2^32
        long bitmap = (long) (value % Math.pow(2, 32));
        // 3. Store the slot corresponding to the key into the bitmap
        redisTemplate.opsForValue().setBit(WHITE_LIST,bitmap,true);
        log.info("Bloom filter initialized whitelist data successfully, {} corresponding bitmap slot: {}", key, bitmap);
    }

    /**
     *
     * @param keyItem The key of the bloom filter is the key stored in the bitmap in redis == whitelist key
     * @param key The key of the data, that is, the bitmap slot is calculated by the key
     * @return
     */
    public boolean checkBloomFilter(String keyItem, String key){

        // 1. Calculate hash value through key
        int value = Math.abs(key.hashCode());
        // 2. Calculate the slot corresponding to the bitmap by taking the modulo of the hash value and 2^32
        long bitmap = (long) (value % Math.pow(2, 32));
        log.info("Check the Redis data whitelist, the bitmap slot corresponding to {}: {}", key, bitmap);
        // 3. Find the slot corresponding to the key in the bitmap. If it exists in the bitmap, it may exist. If it does not, it must not exist.
        return redisTemplate.opsForValue().getBit(keyItem, bitmap);
    }



}

Business layer specific implementation logic

package com.toonyoo.redis.bloomFilter;

import com.alibaba.fastjson.JSON;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.toonyoo.redis.bloomFilter.util.BloomFilterUtil;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.stereotype.Service;

import static com.toonyoo.redis.bloomFilter.util.BloomFilterUtil.WHITE_LIST;

@Service
@Slf4j
public class CustomerService extends ServiceImpl<CustomerMapper, Customer> {


    @Autowired
    private CustomerMapper customerMapper;

    @Autowired
    private StringRedisTemplate redisTemplate;

    @Autowired
    private BloomFilterUtil bloomFilterUtil;

    public static final String CUSTOMER_REDIS_KEY = "customer:";

    /**
     *Business scenario-simulate adding users and writing back to Redis
     *
     * @param customer
     */
    public void addCustomer(Customer customer) {

        if (customer != null) {
            //Write data to mysql
            int i = customerMapper.insert(customer);
            //If writing to mysql is successful
            if (i > 0) {
                // cache redis key
                String key = CUSTOMER_REDIS_KEY + customer.getId();
                // Retrieve the data from mysql
                Customer c = customerMapper.selectById(customer.getId());
                // Serialization
                String jsonString = JSON.toJSONString(c);
                //Write back into redis
                redisTemplate.opsForValue().set(key, jsonString);

                /**
                 * The key cannot be stored in the Bloom filter here, because the data in the cache will expire, but the bitmap will not expire.
                 * If the cache expires in large batches, it will first hit the Bloom filter, which is considered a whitelist, and then a large number of requests will hit Redis and the data in the magnet Redis has expired.
                 * Causes a large number of requests to mysql.
                 */

            }
        }

    }

    /**
     * Simulate business scenario-read redis data
     * @param id
     */
    public Customer getCustomerById(Integer id) {

        Customer customer = null;
        // cache redis key
        String key = CUSTOMER_REDIS_KEY + id;

        // First check whether the Bloom filter exists for this key. If not, return directly. This can prevent cache penetration.
        boolean flag = bloomFilterUtil.checkBloomFilter(WHITE_LIST, key);

        // If there is none, there must be none, access is denied, and returns directly
        if (!flag){
            log.info("This key does not exist in the Bloom filter, access is denied");
            return null;
        }

        //Check redis first
        String result = redisTemplate.opsForValue().get(key);
        //Deserialize
        customer = JSON.parseObject(result,Customer.class);
        //If redis is not available, check mysql
        if (result == null){
            customer = customerMapper.selectById(id);
            // If data exists in mysql, it needs to be written back to redis
            if (customer != null){
                String jsonString = JSON.toJSONString(customer);
                redisTemplate.opsForValue().set(key,jsonString);
            }
        }
        return customer;
    }
}