ocr specifies the character recognition range (two methods)

Using paddleocr’s code

  • Code download address: https://gitee.com/paddlepaddle/PaddleOCR/tree/release/2.6/deploy/cpp_infer
  • model used
    https://gitee.com/paddlepaddle/PaddleOCR/blob/release/2.6/doc/doc_ch/models_list.md

    Download link: https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar
  • dictionary used
    https://gitee.com/paddlepaddle/PaddleOCR/blob/release/2.6/ppocr/utils/en_dict.txt

The original version starts at line 106 in ocr_rec.cpp

 for (int n = 0; n < predict_shape[1]; n ++ ) {<!-- -->
        // get idx
        argmax_idx = int(Utility::argmax(
             & amp; predict_batch[(m * predict_shape[1] + n) * predict_shape[2]],
             & amp; predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]]));
        // get score
        max_value = float(*std::max_element(
             & amp; predict_batch[(m * predict_shape[1] + n) * predict_shape[2]],
             & amp; predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]]));
        if (argmax_idx > 0 & amp; & amp; (!(n > 0 & amp; & amp; argmax_idx == last_index))) {<!-- -->
          score += max_value;
          count += 1;
          str_res + = label_list_[argmax_idx];
        }
         last_index = argmax_idx;
      }
  1. method one:
 for (int n = 0; n < predict_shape[1]; n ++ )
      {<!-- -->
        // get idx
        argmax_idx = int(Utility::argmax(
             & amp; predict_batch[(m * predict_shape[1] + n) * predict_shape[2]],
             & amp; predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]]));
        // get score
        max_value = float(*std::max_element(
             & amp; predict_batch[(m * predict_shape[1] + n) * predict_shape[2]],
             & amp; predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]]));

/original//
// /* For the dictionary en_dict.txt, 0~9 numbers, 10~16, 43~48, 75~93 symbols, 17~42 uppercase letters, 49~74 lowercase letters, */
// /*dictionary print*/
// // for(int mm=0;mm<100;mm++)
// // {<!-- -->
// // std::cout<<"Number: "<<mm<<"Label: "<<label_list_[mm]<<std::endl;
// // }
// /* only recognize numbers */
// //if(argmax_idx<11 || (argmax_idx==96))
// /*Only recognize lowercase letters*/
// //if(argmax_idx==0 || (argmax_idx<76 & amp; & amp; argmax_idx>49) || (argmax_idx==96))
// /* Only uppercase letters are recognized */
// //if(argmax_idx==0 || ((argmax_idx>17) & amp; & amp; (argmax_idx<44)) || (argmax_idx==96))
// /*Recognize uppercase letters and numbers*/
// // if(argmax_idx==0 || ((argmax_idx>17) & amp; & amp; (argmax_idx<44)) || (argmax_idx<11) || (argmax_idx==96))
// // {<!-- -->
// // std::cout<<"-------------------1---------------"<< std::endl;
// // std::cout<<" argmax_idx "<<argmax_idx<<" label: "<<label_list_[argmax_idx]<<std::endl;
// if (argmax_idx > 0 & amp; & amp; (!(n > 0 & amp; & amp; argmax_idx == last_index)))
// {<!-- -->
// // std::cout<<"-------------------2---------------"<< std::endl;
// // std::cout<<" argmax_idx "<<argmax_idx<<" label: "<<label_list_[argmax_idx]<<std::endl;
// score += max_value;
//count += 1;
// str_res + = label_list_[argmax_idx];
// }
// // }
/original//
        bool Number = true;//Number
        bool Mark = true;//punctuation
        bool letter = true;//letter
        //Check all punctuation numbers and letters
        if (Number & amp; & amp; Mark & amp; & amp; letter)
        {<!-- -->
                  if (argmax_idx > 0 & amp; & amp; (!(n > 0 & amp; & amp; argmax_idx == last_index)))
                  {<!-- -->
                    //std::cout<<" argmax_idx "<<argmax_idx<<" label: "<<label_list_[argmax_idx]<<std::endl;
                    score += max_value;
                    count += 1;
                    str_res + = label_list_[argmax_idx];
                  }
        }
        else if(!Number & amp; & amp; !Mark & amp; & amp; !letter)//Uncheck all punctuation numbers and letters
        {<!-- -->
          continue;
        }
        else if(Number & amp; & amp; !Mark & amp; & amp; !letter)//only numbers
        {<!-- -->
          if(argmax_idx<11 || (argmax_idx==96))
              {<!-- -->
                  if (argmax_idx > 0 & amp; & amp; (!(n > 0 & amp; & amp; argmax_idx == last_index)))
                  {<!-- -->
                    //std::cout<<" argmax_idx "<<argmax_idx<<" label: "<<label_list_[argmax_idx]<<std::endl;
                    score += max_value;
                    count += 1;
                    str_res + = label_list_[argmax_idx];
                  }
              }
        }
        else if(!Number & amp; & amp; !Mark & amp; & amp; letter)//only letters
        {<!-- -->
              if((argmax_idx<76 & amp; & amp; argmax_idx>49) || ((argmax_idx>17) & amp; & amp; (argmax_idx<44)) || (argmax_idx==96))
              //if(argmax_idx==0 || (argmax_idx<76 & amp; & amp; argmax_idx>49) || ((argmax_idx>17) & amp; & amp; (argmax_idx<44)) || (argmax_idx== 96))
              {<!-- -->
                  if (argmax_idx > 0 & amp; & amp; (!(n > 0 & amp; & amp; argmax_idx == last_index)))
                  {<!-- -->
                    //std::cout<<" argmax_idx "<<argmax_idx<<" label: "<<label_list_[argmax_idx]<<std::endl;
                    score += max_value;
                    count += 1;
                    str_res + = label_list_[argmax_idx];
                  }
              }
        }
        else if(!Number & amp; & amp; Mark & amp; & amp; !letter)//only punctuation
        {<!-- -->
            if((argmax_idx>=11 & amp; & amp; argmax_idx<=17) || ((argmax_idx>=44) & amp; & amp; (argmax_idx<=49)) || ((argmax_idx>=76) & amp; & amp; (argmax_idx<=94)) || (argmax_idx==96) )
            // if(argmax_idx==0 || (argmax_idx>=11 & amp; & amp; argmax_idx<=17) || ((argmax_idx>=44) & amp; & amp; (argmax_idx<=49)) || ((argmax_idx>=76) & amp; & amp; (argmax_idx<=94)) || (argmax_idx==96) )
              {<!-- -->
                  if (argmax_idx > 0 & amp; & amp; (!(n > 0 & amp; & amp; argmax_idx == last_index)))
                  {<!-- -->
                    //std::cout<<" argmax_idx "<<argmax_idx<<" label: "<<label_list_[argmax_idx]<<std::endl;
                    score += max_value;
                    count += 1;
                    str_res + = label_list_[argmax_idx];
                  }
              }
        }
        else if(Number & amp; & amp; Mark & amp; & amp; !letter)//Numbers and punctuation
        {<!-- -->
          if(argmax_idx<=17 || ((argmax_idx>=44) & amp; & amp; (argmax_idx<=49)) || ((argmax_idx>=76) & amp; & amp; (argmax_idx<=94)) || (argmax_idx==96) )
              {<!-- -->
                  if (argmax_idx > 0 & amp; & amp; (!(n > 0 & amp; & amp; argmax_idx == last_index)))
                  {<!-- -->
                    //std::cout<<" argmax_idx "<<argmax_idx<<" label: "<<label_list_[argmax_idx]<<std::endl;
                    score += max_value;
                    count += 1;
                    str_res + = label_list_[argmax_idx];
                  }
              }
        }
        else if(Number & amp; & amp; !Mark & amp; & amp; letter)//numbers and letters
        {<!-- -->
              if(argmax_idx<11 || (argmax_idx<76 & amp; & amp; argmax_idx>49) || ((argmax_idx>17) & amp; & amp; (argmax_idx<44)) || (argmax_idx==96))
                {<!-- -->
                  if (argmax_idx > 0 & amp; & amp; (!(n > 0 & amp; & amp; argmax_idx == last_index)))
                  {<!-- -->
                    //std::cout<<" argmax_idx "<<argmax_idx<<" label: "<<label_list_[argmax_idx]<<std::endl;
                    score += max_value;
                    count += 1;
                    str_res + = label_list_[argmax_idx];
                  }
                }
        }
        else if(!Number & amp; & amp; Mark & amp; & amp; letter)//letters and punctuation
        {<!-- -->
          if((argmax_idx>=11 & amp; & amp; argmax_idx<=94) || (argmax_idx==96) )
          // if(argmax_idx==0 || (argmax_idx>=11 & amp; & amp; argmax_idx<=94) || (argmax_idx==96) )
              {<!-- -->
                  if (argmax_idx > 0 & amp; & amp; (!(n > 0 & amp; & amp; argmax_idx == last_index)))
                  {<!-- -->
                    //std::cout<<" argmax_idx "<<argmax_idx<<" label: "<<label_list_[argmax_idx]<<std::endl;
                    score += max_value;
                    count += 1;
                    str_res + = label_list_[argmax_idx];
                  }
              }
        }

         last_index = argmax_idx;
      }
  1. Method Two
Delineate the selection range, if the character recognition with the highest probability is not in the range, judge the character with the second highest probability, and so on
          bool Number = true;//Number
          bool Mark = false;//punctuation
          bool letter = true;//letter
  for (int n = 0; n < predict_shape[1]; n ++ ) {<!-- -->
    std::vector<float> datas(predict_shape[2]);
    memcpy( &datas[0], &predict_batch[n * predict_shape[2]], sizeof(float)*predict_shape[2]);
    std::vector<int> idx_list = Argsort(datas);
    // std::cout<<"max: "<<argmax_idx<<std::endl;
    // std::cout<<idx_list.size()<<"------------"<<idx_list[idx_list.size()-1]<<std::endl;
    for(int j=idx_list. size()-1;;j--)
    {<!-- -->
  int idx=idx_list[j];
  // /* For the dictionary en_dict.txt, 0~9 numbers, 10~16, 43~48, 75~93 symbols, 17~42 uppercase letters, 49~74 lowercase letters, */
          //Check all punctuation numbers and letters
          if (Number & amp; & amp; Mark & amp; & amp; letter)
          {<!-- -->
              argmax_idx = idx;
              break;
          }
          else if(!Number & amp; & amp; !Mark & amp; & amp; !letter)//Uncheck all punctuation numbers and letters
          {<!-- -->
            argmax_idx = 96;
            break;
          }
          else if(Number & amp; & amp; !Mark & amp; & amp; !letter)//only numbers
          {<!-- -->
            if(idx<11 || (idx==96))
                {<!-- -->
                  argmax_idx = idx;
                  break;
                }
             else{<!-- -->
                  continue;
             }
          }
          else if(!Number & amp; & amp; !Mark & amp; & amp; letter)//only letters
          {<!-- -->
            if((idx<76 & amp; & amp; idx>49) || ((idx>17) & amp; & amp; (idx<44)) || (idx==96))
                //if(argmax_idx==0 || (argmax_idx<76 & amp; & amp; argmax_idx>49) || ((argmax_idx>17) & amp; & amp; (argmax_idx<44)) || (argmax_idx== 96))
                {<!-- -->
                  argmax_idx = idx;
                  break;
                }
            else{<!-- -->
                  continue;
                }
          }
          else if(!Number & amp; & amp; Mark & amp; & amp; !letter)//only punctuation
          {<!-- -->
              if((idx>=11 & amp; & amp; idx<=17) || ((idx>=44) & amp; & amp; (idx<=49)) || ((idx>=76) & amp; & amp; (idx<=94)) || (idx==96) )
              // if(argmax_idx==0 || (argmax_idx>=11 & amp; & amp; argmax_idx<=17) || ((argmax_idx>=44) & amp; & amp; (argmax_idx<=49)) || ((argmax_idx>=76) & amp; & amp; (argmax_idx<=94)) || (argmax_idx==96) )
                {<!-- -->
                  argmax_idx = idx;
                  break;
                }
             else{<!-- -->
                  continue;
                }
          }
          else if(Number & amp; & amp; Mark & amp; & amp; !letter)//Numbers and punctuation
          {<!-- -->
            if(idx<=17 || ((idx>=44) & amp; & amp; (idx<=49)) || ((idx>=76) & amp; & amp; (idx<=94)) || (idx==96) )
                {<!-- -->
                  argmax_idx = idx;
                  break;
                }
             else{<!-- -->
                  continue;
                }
          }
          else if(Number & amp; & amp; !Mark & amp; & amp; letter)//numbers and letters
          {<!-- -->
                      // std::cout<<"--------------------"<<std::endl;
                if(idx<11 || (idx<76 & amp; & amp; idx>49) || ((idx>17) & amp; & amp; (idx<44)) || (idx==96))
                  {<!-- -->
                  argmax_idx = idx;
                  break;
                }
             else{<!-- -->
                  continue;
                  }
          }
          else if(!Number & amp; & amp; Mark & amp; & amp; letter)//letters and punctuation
          {<!-- -->
            if((idx>=11 & amp; & amp; idx<=94) || (idx==96) )
            // if(argmax_idx==0 || (argmax_idx>=11 & amp; & amp; argmax_idx<=94) || (argmax_idx==96) )
                {<!-- -->
                  argmax_idx = idx;
                  break;
                }
             else{<!-- -->
                  continue;
                }
          }
        }

    
    // argmax_idx = int(Argmax( & amp; predict_batch[n * predict_shape[2]],
    // & amp; predict_batch[(n + 1) * predict_shape[2]]));
    max_value =
        float(*std::max_element( & amp; predict_batch[n * predict_shape[2]],
                                 & amp; predict_batch[(n + 1) * predict_shape[2]]));
    // max_value = predict_batch[argmax_idx];
    if (argmax_idx > 0 & amp; & amp; (!(n > 0 & amp; & amp; argmax_idx == last_index))) {<!-- -->
      score += max_value;
      count += 1;
      str_res += character_dict[argmax_idx];
    }
    last_index = argmax_idx;
  }