Thursday, June 11, 2020

C++ example: custom class type used in unordered_map

To use a custom class type as key in unordered_map, we need to overload the == operator and define custom hash function for this type. See the below example.

#include <iostream>
#include <string>
#include <unordered_map>

using namespace std;

class Student{
public:
    Student(const string& name, int age):name(name), age(age){}

    friend bool operator==(const Student& lhs, const Student& rhs){
        return lhs.name == rhs.name && lhs.age == rhs.age;
    }

    void output() const{
        cout<<"Name "<<name<<" Age "<<age<<endl;
    }

public:
    string name;
    int age;
};

struct hashStudent{
    std::size_t operator()(const Student& stu) const{
        return hash<string>()(stu.name)| (hash<int>()(stu.age)<<1);
    }
};

int main(){
    unordered_map<Student, int, hashStudent> m;
    m.emplace(Student{"John", 21}, 4);
    m.emplace(Student{"John", 12}, 3);
    m.emplace(Student{"Ann", 24}, 2);

    for(const auto& [key, val]: m){
        cout<<key.name<<" "<<key.age<<" "<<val<<endl;
    }
    return 0;
}

Saturday, June 6, 2020

C++ priority queue custom comparator

Method1

#include <iostream>
#include <string>
#include <vector>
#include <queue>

using namespace std;

class Student{
public:
	Student(const string& name, int age):name(name), age(age){}

	friend bool operator<(const Student& lhs, const Student& rhs){
		if(lhs.name == lhs.name){
			return lhs.age < rhs.age;
		}
		return lhs.name < rhs.name;		
	}

	void output() const{
		cout<<"Name "<<name<<" Age "<<age<<endl;
	}

public:
	string name;
	int age;
};

int main(){
	priority_queue<Student> q;
	q.push(Student{"John", 21});
	q.push(Student{"John", 12});
	q.push(Student{"Ann", 24});

	while(!q.empty()){
		q.top().output();
		q.pop();
	}
	return 0;
}

Method2

#include <iostream>
#include <string>
#include <vector>
#include <queue>

using namespace std;

class Student{
public:
	Student(const string& name, int age):name(name), age(age){}

	bool operator<(const Student& other) const{
		if(name == other.name){
			return age < other.age;
		}
		return name < other.name;
	}

	void output() const{
		cout<<"Name "<<name<<" Age "<<age<<endl;
	}

public:
	string name;
	int age;
};

int main(){
	priority_queue<Student> q;
	q.push(Student{"John", 21});
	q.push(Student{"John", 12});
	q.push(Student{"Ann", 24});

	while(!q.empty()){
		q.top().output(); 
		q.pop();
	}
	return 0;
}

Method3

#include <iostream>
#include <string>
#include <vector>
#include <queue>

using namespace std;

class Student{
public:
	Student(const string& name, int age):name(name), age(age){}

	void output() const{
		cout<<"Name "<<name<<" Age "<<age<<endl;
	}

public:
	friend struct comp;

	string name;
	int age;
};

struct comp{
	bool operator()(const Student& lhs, const Student& rhs){
		if(lhs.name == lhs.name){
			return lhs.age < rhs.age;
		}
		return lhs.name < rhs.name;				
	}
};

int main(){
	priority_queue<Student, vector<Student>, comp> q;
	q.push(Student{"John", 21});
	q.push(Student{"John", 12});
	q.push(Student{"Ann", 24});

	while(!q.empty()){
		q.top().output(); 
		q.pop();
	}
	return 0;
}

Method4*

#include <iostream>
#include <string>
#include <vector>
#include <queue>

using namespace std;

class Student{
public:
	Student(const string& name, int age):name(name), age(age){}

	void output() const{
		cout<<"Name "<<name<<" Age "<<age<<endl;
	}

public:
	string name;
	int age;
};

bool comp(const Student& lhs, const Student& rhs){
	if(lhs.name == lhs.name){
		return lhs.age < rhs.age;
	}
	return lhs.name < rhs.name;				
}

int main(){
	priority_queue<Student, vector<Student>, std::function<bool(const Student&, const Student&)>> q(comp);
	q.push(Student{"John", 21});
	q.push(Student{"John", 12});
	q.push(Student{"Ann", 24});

	while(!q.empty()){
		q.top().output(); 
		q.pop();
	}
	return 0;
}

Method5

#include <iostream>
#include <string>
#include <vector>
#include <queue>

using namespace std;

class Student{
public:
	Student(const string& name, int age):name(name), age(age){}

	void output() const{
		cout<<"Name "<<name<<" Age "<<age<<endl;
	}

public:
	string name;
	int age;
};

int main(){

	auto comp = [](const Student& lhs, const Student& rhs)->bool{
		if(lhs.name == lhs.name){
			return lhs.age < rhs.age;
		}
		return lhs.name < rhs.name;	
	};

	priority_queue<Student, vector<Student>, decltype(comp)> q(comp);
	q.push(Student{"John", 21});
	q.push(Student{"John", 12});
	q.push(Student{"Ann", 24});

	while(!q.empty()){
		q.top().output(); 
		q.pop();
	}
	return 0;
}

Thursday, May 28, 2020

C++ keywords, explicit, default, delete, noexcept, override, and final

explicit
prevent use for implicit conversion in side a class

for example

explicit Array(int size);

This code prevent implicit conversion from int to Array class

default
Let compiler generate default functions.

delete
Remove the default implementation of a method

Array(const Array&) = delete;

for example, the above code prevents C++ from automatically creating copy constructor

noexcept
Optimize the code without worrying about exception. Does not guarantee there is no exceptions throw in the function.

override
Tell a virtual function it must override a function in the base class. Preventing override with different parameters.

final
disallow inheritance from class or function
for example

struct Base final{
}

Base struct cannot be inherited.

Thursday, May 21, 2020

R Parallel Writing to Files

Here I will use foreach and doParallel library to demonstrate how to parallel write to file.

library(stringr)
library(flock)
library(foreach)
library(doParallel)
cl <- makeCluster(detectCores(), outfile = "a.out")
registerDoParallel(cl)
lock0 <-tempfile()
foreach (
  i = 1:10000,
  .combine = cbind,
  .packages = c('stringr', 'flock'),
  .export = ls(globalenv())
) %dopar% {
  locked0 <- flock::lock(lock0)
  write(i,file="outfile.txt"),append=TRUE) 
  flock::unlock(locked0)
}
stopCluster(cl)

The makeCluster(detectCores(), outfile = "a.out") statement make a cluster by using the all available cores, and the console output will be direct to a.out file.

The statement registerDoParallel(cl) register the cluster as the foreach parallel backend.
Note, int the foreach statements, we have .packages = c('stringr', 'flock') and .export = ls(globalenv(). The former exposes the specified packages to the context inside the foreach loop and the latter exposes all the declared variable to the foreach loop. Without this, the inside foreach loop cannot see the outside library or variables.

To avoid data race problem when multiple processes/threads writing to the same file, we use flock library as a mutex and wrap the write operation by flock::lock and flock::unlock.

Using mutex can make the processing really slow. The other way to do this is that each process write to its separate file. You can use the process id in the file name. For example,

write(i,file=paste(c("outfile",Sys.getpid(),".txt"), collapse =""),append=TRUE)

One thing to notice is that, if you parallel processing include database connections, the above code will fail since the parallel process cannot spawn the database connections. You can use the below code initialize the connections when build the cluster using clusterEvalQ.

library(RODBC)   #use the ODBC library
library(DBI)
library(odbc)
odbcCloseAll()
library(foreach)
library(doParallel)
cl <- makeCluster(detectCores(), outfile = "a.out")
clusterEvalQ(cl, {
   library(odbc)
   library(RODBC)
   library(DBI)
   dbname1 = "test"  # change this when change server!!!
   channel1 = RODBC::odbcConnect(dbname1)
   con1 <- DBI::dbConnect(odbc(),  dbname1)
})
registerDoParallel(cl)

Visual Studio 2017 SSIS project incompatible

Open Visual Studio 2017, select Tools–> Extensions and Updates. Click Online in the left pane, search “Microsoft Reporting Services Projects”. Then click install. You need to close Visual Studio to let the installation begin. When it is done, Tools–> Extensions and Updates. Click Installed in the left pane, search “Microsoft Reporting Services Projects”. Click Enable.
Right click the incompatible project, click Reload. This should solve the problem.

Monday, May 18, 2020

C++ avoid arithmetic operation on size_t time

Here is a simple test code about this topic

#include <typeinfo>

    string s= "a";
    int i = 0;
    cout<<i<<" "<<typeid(i).name()<<endl;
    cout<<s.length()<<" "<<typeid(s.length()).name()<<endl;
    cout<<i - s.length()<<" "<<typeid(i - s.length()).name()<<endl;

The output is a very large number (18446744073709551615) instead of -1 as intended (see below).

0 i
1 m
18446744073709551615 m

As the type of s.length() is size_t. size_t is unsigned int or unsigned long depending on the machine used. It seems that the compiler converts the value to an unsigned long type.
To avoid this kind of problem, using something int n = s.length(); and then use this variable to do the calculations.

Sunday, May 17, 2020

Is it safe to delete a pointer to nullptr

I did a test on a online compiler(online compiler) with the below code.

#include <iostream>

using namespace std;

int main()
{
    int * a = nullptr;
    delete a;
    return 0;
}

The program compiles and runs with no problem. So it is safe to delete pointer too a nullptr. However, you can not execute delete nullptr; directly since nullptr is a pointer literal.

Sunday, May 10, 2020

Solving AWS ParallelCluster Cannot Submit Multiple Node using Slurm + OpenMPI

Recently, I have tried out AWS ParallelCluster which is a Linux based HPC cluster solution. We use Slurm as the scheduler and OpenMPI. When submit jobs to multiple compute, it has various error messages, below is one version of it.

[ip-10-0-19-27][[16152,1],0][btl_tcp_endpoint.c:626:mca_btl_tcp_endpoint_recv_connect_ack] received unexpected process identifier [[16152,1],1]
[ip-10-0-19-27][[16152,1],1][btl_tcp_endpoint.c:626:mca_btl_tcp_endpoint_recv_connect_ack] received unexpected process identifier [[16152,1],0]
[ip-10-0-19-27][[16152,1],2][btl_tcp_endpoint.c:626:mca_btl_tcp_endpoint_recv_connect_ack] received unexpected process identifier [[16152,1],3]
[ip-10-0-19-27][[16152,1],3][btl_tcp_endpoint.c:626:mca_btl_tcp_endpoint_recv_connect_ack] received unexpected process identifier [[16152,1],2]
[ip-10-0-20-194][[16152,1],4][btl_tcp_endpoint.c:626:mca_btl_tcp_endpoint_recv_connect_ack] received unexpected process identifier [[16152,1],5]
[ip-10-0-20-194][[16152,1],5][btl_tcp_endpoint.c:626:mca_btl_tcp_endpoint_recv_connect_ack] received unexpected process identifier [[16152,1],4]
[ip-10-0-20-194][[16152,1],6][btl_tcp_endpoint.c:626:mca_btl_tcp_endpoint_recv_connect_ack] received unexpected process identifier [[16152,1],7]
[ip-10-0-20-194][[16152,1],7][btl_tcp_endpoint.c:626:mca_btl_tcp_endpoint_recv_connect_ack] received unexpected process identifier [[16152,1],6]

It turns out that OpenMPI somehow did not find the network interface. Adding –mca btl_tcp_if_include ens3 command line parameter to mpirun will solve the problem. Here ens3 is the default network interface. You could find it using ifconfig.

Below is an sample submission script.

#!/bin/bash
#SBATCH --job-name=montecarlojob
#SBATCH --ntasks=8
#SBATCH --output=%x_%j.out
module load openmpi
mpirun --mca btl_tcp_if_include ens3 -np 8 a.out

Saturday, May 2, 2020

Conversion between CMakeList and Visual Studio Solution

Convert Visual Studio solution file to CMakeList file

Bellow the solution Github repository
https://github.com/pavelliavonau/cmakeconverter

pip install cmake-converter
cmake-converter -s <path/to/file.sln>

Convert CMakeLists to Visual Studio solution

Bellow the solution Github repository
https://cognitivewaves.wordpress.com/cmake-and-visual-studio/

mkdir _build
cd _build
cmake .. -G "Visual Studio 15 2017 Win64"

Sunday, April 26, 2020

Build googletest on Mac OS (solve -Werror, -Wc++-extensions)

Build googletest on Mac OS (-Werror, -Wc++-extensions

When I try to build google test on my MacBook using Cmake, the following errors are shown.

**deleted function definitions are a C++11 extension**
**[-Werror,-Wc++11-extensions]**
GTEST_DISALLOW_ASSIGN_(RE);
**error:**
**deleted function definitions are a C++11 extension**
**[-Werror,-Wc++11-extensions]**
...
expanded from macro 'GTEST_DISALLOW_COPY_AND_ASSIGN_'
GTEST_DISALLOW_ASSIGN_(type)
expanded from macro 'GTEST_DISALLOW_ASSIGN_'
type& operator=(type const &) = delete

From the error message, it seems that the compiler is not using the correct C++ standard. Using CMake parameter -DCMAKE_CXX_STANDARD=“17” solves the problem. Below is the full commands

git clone https://github.com/google/googletest
cd googletest/
mkdir build
cd build/
cmake -DCMAKE_CXX_STANDARD="17" ../
make
sudo make install

Using static or extern as global variable in C++

Good practice using extern variable:
Declare the extern variable in an .h file
extern int outputMonth;

Define and initialize it outside of a function (like before main function)
int outputMonth = 50;

Static variable is internal linkage (extern use external linkage) where each cpp file has its own copy of value. So it is not recommended to use it as a global variable.

Monday, April 20, 2020

Deploy C++ Program to CentOS Server with Similar Configuration

Here is an simple tutorial that shows how to deploy a C++ program to a Linux server which has similar architecture and software package as your development Linux workstation.

Compile your C++ program into executable
Here is a shell script that Copying shared library dependencies. You can use this script to copy all dependent dynamic libraries into one folder. (note: you may want only keep those custom libraries you used and remove those libraries provide by operating system)
Copy your C++ program and the library folder to the linux sever you want to deploy on, using the following command to put the dynamic folder name to link library path

export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/your_library_path

Note: if you want this setting to be persistent, you need to add this command to your .bash_rc or .bash_profile file.

Saturday, April 18, 2020

Build CentOS7 C++ Compiling Environment with MPI and Boost Library

In this tutorial, I will show how to build a C++ compiling environment from raw CentOS linux. Specifically, OpenMPI and Boost Library is included in this environment.

First, use the following commands to update CentOS repositories

yum update -y

Then, group install the development tools in which gcc is included.

yum groupinstall "Development Tools" -y

Using the following command to install OpenMPI

yum install openmpi openmpi-devel -y

Since the default version of CMake in CentOS7 is rather old, we will install a relative new version. Below commands install some dependent libraries

yum install wget -y
yum install openssl-devel -y

Run the below commands to build and install CMake 3.17

wget https://github.com/Kitware/CMake/releases/download/v3.17.0/cmake-3.17.0.tar.gz
tar -zxvf cmake-3.17.0.tar.gz
cd cmake-3.17.0 
./bootstrap --prefix=/usr/local
make && make install

Now we start to download Boost Libary 1.68.0

wget https://dl.bintray.com/boostorg/release/1.68.0/source/boost_1_68_0.tar.gz
tar -zxvf boost_1_68_0.tar.gz
cd boost_1_68_0

Since we need Boost MPI library, make sure you load the OpenMPI module

source /etc/profile.d/modules.sh
module load mpi/openmpi-x86_64

Using the below command to compile and install Boost library

./bootstrap.sh
echo "using mpi ;" >> project-config.jam
./b2 threading=multi --with-program_options --with-filesystem --with-serialization --with-mp    i --with-system install

Note the “using mpi ;” string is essential to tell Boost building script to compile Boost MPI Library. If you need other Boost libraries, just add the options –with-xxx where xxx is the library name.

Mission accomplished!

Thursday, April 16, 2020

Simple Facebook Prophet Tutorial: Predicting S&P Index

Stock price is very difficult to predict, since it is very complex and influenced by many factors that are not easily quantified. In this post, I will just use Facebook Prophet package to do a simple S&P 500 Index prediction for tutorial purpose. Facebook Prophet is based on additive model and accounts for non-linear components by including seasonality and holiday effect. The data I use is from Yahoo Finance between 1998-01-01 to 2018-12-31. The majority of the data (1998-01-01 to 2017-12-31) are used as training data and the rest are used as validation.

First, load necessary libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as pyplot

Load the S&P 500 data from yahoo finance

sp500 = pd.read_csv("sp500_yahoo_finance.csv", parse_dates=['Date'])

Let us first look at the data

sp500.head()

As we can see, different index values like "Open", "High", "Low" etc. as well as "Volume". Here we use the "Close" index as our prediction value

Next step. The raw data is split into training data and test data

trainSp500 = sp500[sp500.Date<'2018-01-01'][['Date','Close']]
testSp500 = sp500[sp500.Date>='2018-01-01'][['Date','Close']]

The Facebook Prophet library is loaded. Data is prepared to the format required by Prophet.

from fbprophet import Prophet
data = pd.DataFrame({'ds':trainSp500['Date'].values, 'y':trainSp500['Close'].values})

All preparation is ready. Here we fit the model using the training data

model = Prophet()
model.fit(data)

We make a one-year prediction.

future = model.make_future_dataframe(365) # forecasting for 1 years
forecast = model.predict(future)

In the end, the follow graph is made to show how good the prediction. Note the vertical line represents the separation point of training and testing data. We can see Facebook did a pretty good job at fitting the training set. However, the testing set is quite off and the big dip is not reflected in the prediction at all.

Saturday, April 11, 2020

Comparison of np.dot and operator *

np.dot calculate the dot product of two arrays. For 2-D arrays, the matrix multiplication result is returned.

The * operator calculate the element-wise multiplication. It will expand its dimension (called broadcasting) if the dimension does not match.

The best way to explain this is through some actual code

Sunday, April 5, 2020

Docker Machine mapping host drive to container

To share files between the host and Docker container, we need to map a directory in the host to a path in the container using the below option with docker run

-v path_in_the_host:path_in_the_container

This command works within linux system, but does not directly work on Docker Machine. One easy workaround is put your folder in C:\Users in your Windows host. Then using the below command

docker run -dit -v /c/Users/folder_name:/mnt/data --name container_name image_name

After this, your folder "folder_name" in C:\Users will be accessible in the container through /mnt/data.