arrow(c++) rewrites the empirical series 1—use arrow to read fund net value data and calculate Sharpe rate

I used the arrow c++ version to read the fund net value data in the csv, and then calculated the Sharpe rate. What is embarrassing is that the arrow c++ version takes more time to calculate than the python empirical version. . .

Arrow is a newbie, and it is the first time to implement the function by myself. It is most likely not the most efficient way to implement it, but I have also taken the first step to rewrite backtrader with arrow c++.

  • Use arrow to rewrite empirical, just like practicing, the goal is to make it Two files: imperial.h and imperial.cpp
  • Rewrite pyfolio with arrow and qt to achieve a more beautiful appearance interface, made into two files: pyfolio.h and pyfolio.cpp
  • Rewrite backtrader

c++ version of the file:

my_example.cc

#include <arrow/api.h>
#include <arrow/io/api.h>
#include "arrow/csv/api.h"
#include <arrow/compute/api.h>
#include <iostream>
#include <chrono>
//#include "../empyrical/empyrical.h"

arrow::Status RunMain(){<!-- -->
    auto start_time = std::chrono::high_resolution_clock::now();
    // First, we need to set up a readable file object that allows us to point the reader to the correct data on disk. We will reuse this object and rebind it into multiple files.
    std::shared_ptr<arrow::io::ReadableFile> infile;
    // Bind the input file to "test_in.csv"
    ARROW_ASSIGN_OR_RAISE(infile, arrow::io::ReadableFile::Open("/home/yun/Documents/fund_nav.csv"));
    // (Documentation section: CSV table declaration)
    std::shared_ptr<arrow::Table> csv_table;
    // The CSV reader has multiple objects for different options. For now, we'll use the default values.
    ARROW_ASSIGN_OR_RAISE(
        auto csv_reader,
        arrow::csv::TableReader::Make(
            arrow::io::default_io_context(), infile, arrow::csv::ReadOptions::Defaults(),
            arrow::csv::ParseOptions::Defaults(), arrow::csv::ConvertOptions::Defaults()));
    //Read the table.
    ARROW_ASSIGN_OR_RAISE(csv_table, csv_reader->Read());

    // Output the metadata information of the Table
    // std::cout << "Table Metadata:" << std::endl;
    // std::cout << "Number of columns: " << csv_table->num_columns() << std::endl;
    // std::cout << "Number of rows: " << csv_table->num_rows() << std::endl;
    // std::cout << "Schema: " << csv_table->schema()->ToString() << std::endl;

    // Output the data displayed in the Table
    // for (int i = 0; i < csv_table->num_columns(); + + i) {<!-- -->
    // std::shared_ptr<arrow::Array> column = csv_table->column(i);
    // std::cout << "Column " << i << ": " << column->ToString() << std::endl;
    // }

    // 1. Method to display table information to std::cout
    // std::shared_ptr<arrow::RecordBatch> record_batch;
    // arrow::Result<std::shared_ptr<arrow::RecordBatch>> result = csv_table->CombineChunksToBatch(); // Perform an operation and return Result
    // if (result.ok()) {<!-- -->
    // record_batch = result.ValueOrDie();
    // // Use record_batch here
    // } else {<!-- -->
    // // Handle errors
    // std::cerr << "Error: " << result.status().ToString() << std::endl;
    // }
    // //arrow::PrettyPrint(*record_batch, 2, & amp;std::cout);
    // arrow::Status status = arrow::PrettyPrint(*record_batch, 2, & amp;std::cout);
    // if (!status.ok()) {<!-- -->
    // // Handle errors, such as printing error messages
    // std::cerr << "Error: " << status.ToString() << std::endl;
    // }
    // 2. Method to display table information to std::cout
    // std::cout << csv_table->ToString() << std::endl;
    // 3. Method to display table information to std::cout
    // arrow::Status status = arrow::PrettyPrint(*csv_table, 2, & amp;std::cout);
    // if (!status.ok()) {<!-- -->
    // // Handle errors, such as printing error messages
    // std::cerr << "Error: " << status.ToString() << std::endl;
    // }
    //Start calculating Sharpe rate
    // std::cout << "There are trading days in a year" << AnnualizationFactors::DAILY << "days" << std::endl;
    // std::cout << DAILY << std::endl;

    // Calculate the rate of return
    arrow::Datum fund_returns;
    arrow::Datum fund_diff;
    std::shared_ptr<arrow::ChunkedArray> cum_nav = csv_table->GetColumnByName("Reweighted Net Value");
    std::shared_ptr<arrow::ChunkedArray> now_cum_nav = cum_nav->Slice(1,cum_nav->length()-1);
    std::shared_ptr<arrow::ChunkedArray> pre_cum_nav = cum_nav->Slice(0,cum_nav->length()-1);
    ARROW_ASSIGN_OR_RAISE(fund_diff, arrow::compute::CallFunction(
                                          "subtract", {<!-- -->now_cum_nav,pre_cum_nav}));
    ARROW_ASSIGN_OR_RAISE(fund_returns, arrow::compute::CallFunction(
                                          "divide", {<!-- -->fund_diff,pre_cum_nav}));
    // // Get the result array
    // std::cout << "Datum kind: " << fund_returns.ToString()
    // << " content type: " << fund_returns.type()->ToString() << std::endl;

    // // std::cout << fund_returns.scalar_as<arrow::DoubleScalar>().value << std::endl;
    // std::cout << fund_returns.chunked_array()->ToString() << std::endl;
    // Calculate Sharpe ratio
    arrow::Datum avg_return;
    arrow::Datum avg_std;
    arrow::Datum daily_sharpe_ratio;
    arrow::Datum sharpe_ratio;
    arrow::Datum sqrt_year;
    // Create Arrow Double scalar
    double days_of_year_double = 252.0;
    std::shared_ptr<arrow::Scalar> days_of_year = arrow::MakeScalar(days_of_year_double);
    ARROW_ASSIGN_OR_RAISE(sqrt_year, arrow::compute::CallFunction(
                                          "sqrt", {<!-- -->days_of_year}));
    ARROW_ASSIGN_OR_RAISE(avg_return, arrow::compute::CallFunction(
                                          "mean", {<!-- -->fund_returns}));
    arrow::compute::VarianceOptions variance_options;
    variance_options.ddof = 1;
    ARROW_ASSIGN_OR_RAISE(avg_std, arrow::compute::CallFunction(
                                          "stddev", {<!-- -->fund_returns}, & amp;variance_options));
    ARROW_ASSIGN_OR_RAISE(daily_sharpe_ratio, arrow::compute::CallFunction(
                                          "divide", {<!-- -->avg_return,avg_std}));
    ARROW_ASSIGN_OR_RAISE(sharpe_ratio, arrow::compute::CallFunction(
                                          "multiply", {<!-- -->daily_sharpe_ratio,sqrt_year}));
    
    std::cout << "The calculated Sharpe ratio: " << sharpe_ratio.scalar_as<arrow::DoubleScalar>().value << std::endl;

    auto end_time = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);

    std::cout << "c++ reads the data and then calculates the Sharpe rate. The total time spent is: " << duration.count()/1000.0 << " ms" << std::endl;

    return arrow::Status::OK();
  }

// (Documentation part: main function)
int main() {<!-- -->
  arrow::Status st = RunMain();
  if (!st.ok()) {<!-- -->
    std::cerr << st << std::endl;
    return 1;
  }
  
  return 0;
}

CMakeLists.txt

cmake_minimum_required(VERSION 3.16)

project(MyExample)

find_package(Arrow REQUIRED)
find_package(Parquet REQUIRED)
find_package(ArrowDataset REQUIRED)

add_executable(my_example my_example.cc)
target_link_libraries(my_example PRIVATE Arrow::arrow_shared Parquet::parquet_shared ArrowDataset::arrow_dataset_shared)

In the same folder, run

cmake -B build
cmake --build build
./build/my_example

The python running code is as follows:

import pandas as pd
import empyrical as ep
import time
a = time.perf_counter()
data = pd.read_csv("/home/yun/Documents/fund_nav.csv")
returns = data['Net weighted value'].pct_change().dropna()
sharpe_ratio = ep.sharpe_ratio(returns)
print("Calculated sharpe_ratio: ", sharpe_ratio)
b = time.perf_counter()
print(f"Python reads the data and then calculates the Sharpe rate. The total time spent is: {<!-- -->(b-a)*1000.0} ms")