I used the arrow c++ version to read the fund net value data in the csv, and then calculated the Sharpe rate. What is embarrassing is that the arrow c++ version takes more time to calculate than the python empirical version. . .
Arrow is a newbie, and it is the first time to implement the function by myself. It is most likely not the most efficient way to implement it, but I have also taken the first step to rewrite backtrader with arrow c++.
- Use arrow to rewrite empirical, just like practicing, the goal is to make it Two files: imperial.h and imperial.cpp
- Rewrite pyfolio with arrow and qt to achieve a more beautiful appearance interface, made into two files: pyfolio.h and pyfolio.cpp
- Rewrite backtrader
c++ version of the file:
my_example.cc
#include <arrow/api.h> #include <arrow/io/api.h> #include "arrow/csv/api.h" #include <arrow/compute/api.h> #include <iostream> #include <chrono> //#include "../empyrical/empyrical.h" arrow::Status RunMain(){<!-- --> auto start_time = std::chrono::high_resolution_clock::now(); // First, we need to set up a readable file object that allows us to point the reader to the correct data on disk. We will reuse this object and rebind it into multiple files. std::shared_ptr<arrow::io::ReadableFile> infile; // Bind the input file to "test_in.csv" ARROW_ASSIGN_OR_RAISE(infile, arrow::io::ReadableFile::Open("/home/yun/Documents/fund_nav.csv")); // (Documentation section: CSV table declaration) std::shared_ptr<arrow::Table> csv_table; // The CSV reader has multiple objects for different options. For now, we'll use the default values. ARROW_ASSIGN_OR_RAISE( auto csv_reader, arrow::csv::TableReader::Make( arrow::io::default_io_context(), infile, arrow::csv::ReadOptions::Defaults(), arrow::csv::ParseOptions::Defaults(), arrow::csv::ConvertOptions::Defaults())); //Read the table. ARROW_ASSIGN_OR_RAISE(csv_table, csv_reader->Read()); // Output the metadata information of the Table // std::cout << "Table Metadata:" << std::endl; // std::cout << "Number of columns: " << csv_table->num_columns() << std::endl; // std::cout << "Number of rows: " << csv_table->num_rows() << std::endl; // std::cout << "Schema: " << csv_table->schema()->ToString() << std::endl; // Output the data displayed in the Table // for (int i = 0; i < csv_table->num_columns(); + + i) {<!-- --> // std::shared_ptr<arrow::Array> column = csv_table->column(i); // std::cout << "Column " << i << ": " << column->ToString() << std::endl; // } // 1. Method to display table information to std::cout // std::shared_ptr<arrow::RecordBatch> record_batch; // arrow::Result<std::shared_ptr<arrow::RecordBatch>> result = csv_table->CombineChunksToBatch(); // Perform an operation and return Result // if (result.ok()) {<!-- --> // record_batch = result.ValueOrDie(); // // Use record_batch here // } else {<!-- --> // // Handle errors // std::cerr << "Error: " << result.status().ToString() << std::endl; // } // //arrow::PrettyPrint(*record_batch, 2, & amp;std::cout); // arrow::Status status = arrow::PrettyPrint(*record_batch, 2, & amp;std::cout); // if (!status.ok()) {<!-- --> // // Handle errors, such as printing error messages // std::cerr << "Error: " << status.ToString() << std::endl; // } // 2. Method to display table information to std::cout // std::cout << csv_table->ToString() << std::endl; // 3. Method to display table information to std::cout // arrow::Status status = arrow::PrettyPrint(*csv_table, 2, & amp;std::cout); // if (!status.ok()) {<!-- --> // // Handle errors, such as printing error messages // std::cerr << "Error: " << status.ToString() << std::endl; // } //Start calculating Sharpe rate // std::cout << "There are trading days in a year" << AnnualizationFactors::DAILY << "days" << std::endl; // std::cout << DAILY << std::endl; // Calculate the rate of return arrow::Datum fund_returns; arrow::Datum fund_diff; std::shared_ptr<arrow::ChunkedArray> cum_nav = csv_table->GetColumnByName("Reweighted Net Value"); std::shared_ptr<arrow::ChunkedArray> now_cum_nav = cum_nav->Slice(1,cum_nav->length()-1); std::shared_ptr<arrow::ChunkedArray> pre_cum_nav = cum_nav->Slice(0,cum_nav->length()-1); ARROW_ASSIGN_OR_RAISE(fund_diff, arrow::compute::CallFunction( "subtract", {<!-- -->now_cum_nav,pre_cum_nav})); ARROW_ASSIGN_OR_RAISE(fund_returns, arrow::compute::CallFunction( "divide", {<!-- -->fund_diff,pre_cum_nav})); // // Get the result array // std::cout << "Datum kind: " << fund_returns.ToString() // << " content type: " << fund_returns.type()->ToString() << std::endl; // // std::cout << fund_returns.scalar_as<arrow::DoubleScalar>().value << std::endl; // std::cout << fund_returns.chunked_array()->ToString() << std::endl; // Calculate Sharpe ratio arrow::Datum avg_return; arrow::Datum avg_std; arrow::Datum daily_sharpe_ratio; arrow::Datum sharpe_ratio; arrow::Datum sqrt_year; // Create Arrow Double scalar double days_of_year_double = 252.0; std::shared_ptr<arrow::Scalar> days_of_year = arrow::MakeScalar(days_of_year_double); ARROW_ASSIGN_OR_RAISE(sqrt_year, arrow::compute::CallFunction( "sqrt", {<!-- -->days_of_year})); ARROW_ASSIGN_OR_RAISE(avg_return, arrow::compute::CallFunction( "mean", {<!-- -->fund_returns})); arrow::compute::VarianceOptions variance_options; variance_options.ddof = 1; ARROW_ASSIGN_OR_RAISE(avg_std, arrow::compute::CallFunction( "stddev", {<!-- -->fund_returns}, & amp;variance_options)); ARROW_ASSIGN_OR_RAISE(daily_sharpe_ratio, arrow::compute::CallFunction( "divide", {<!-- -->avg_return,avg_std})); ARROW_ASSIGN_OR_RAISE(sharpe_ratio, arrow::compute::CallFunction( "multiply", {<!-- -->daily_sharpe_ratio,sqrt_year})); std::cout << "The calculated Sharpe ratio: " << sharpe_ratio.scalar_as<arrow::DoubleScalar>().value << std::endl; auto end_time = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time); std::cout << "c++ reads the data and then calculates the Sharpe rate. The total time spent is: " << duration.count()/1000.0 << " ms" << std::endl; return arrow::Status::OK(); } // (Documentation part: main function) int main() {<!-- --> arrow::Status st = RunMain(); if (!st.ok()) {<!-- --> std::cerr << st << std::endl; return 1; } return 0; }
CMakeLists.txt
cmake_minimum_required(VERSION 3.16) project(MyExample) find_package(Arrow REQUIRED) find_package(Parquet REQUIRED) find_package(ArrowDataset REQUIRED) add_executable(my_example my_example.cc) target_link_libraries(my_example PRIVATE Arrow::arrow_shared Parquet::parquet_shared ArrowDataset::arrow_dataset_shared)
In the same folder, run
cmake -B build cmake --build build ./build/my_example
The python running code is as follows:
import pandas as pd import empyrical as ep import time a = time.perf_counter() data = pd.read_csv("/home/yun/Documents/fund_nav.csv") returns = data['Net weighted value'].pct_change().dropna() sharpe_ratio = ep.sharpe_ratio(returns) print("Calculated sharpe_ratio: ", sharpe_ratio) b = time.perf_counter() print(f"Python reads the data and then calculates the Sharpe rate. The total time spent is: {<!-- -->(b-a)*1000.0} ms")