2022年12月20日火曜日

C++ neural networkの勉強

■行列を使うタイプのニューラルネットワークを作ってみました。
参考にしたのは
10.1: Introduction to Neural Networks - The Nature of Code https://www.youtube.com/watch?v=XJ7HLz9VYz0

C++ neural networks from scratch - Pt 1. building a matrix library  https://www.lyndonduong.com/linalg-cpp/




まずは作ってみようと思って作ったのがこちら。

[ソースコード]


#include <cstdio>
#include <cmath>
#include <vector>
#include <random>
#include <future>

#include <iostream>
#include <fstream>
#include <string>







class Matrix
{
public:
	static Matrix matmul( const Matrix& a, const Matrix& b )
	{
		Matrix ret( a.m_rows, b.m_cols );

		if ( a.m_cols == b.m_rows )
		{
			for ( int y = 0; y < ret.m_rows; y++ )
			{
				for ( int x = 0; x < ret.m_cols; x++ )
				{
					double sum = 0;
					for ( int z = 0; z < a.m_cols; ++z )
					{
						sum += a.m_values[y][z] * b.m_values[z][x];
					}

					ret.m_values[y][x] = sum;
				}
			}
		}
		else
		{
			std::printf("%s - miss match matrix.[%dx%d]x[%dx%d]\n", __FUNCSIG__, a.m_rows, a.m_cols, b.m_rows, b.m_cols );
		}

		return ret;
	}



	static Matrix sub( const Matrix& a, const Matrix& b )
	{
		Matrix ret( a.m_rows, a.m_cols );
		if ( a.m_cols == b.m_cols &&
			a.m_rows == b.m_rows )
		{
			for ( int y = 0; y < ret.m_rows; y++ )
			{
				for ( int x = 0; x < ret.m_cols; x++ )
				{
					ret.m_values[y][x] = a.m_values[y][x] - b.m_values[y][x];
				}
			}
		}
		
		return ret;
	}

	static Matrix createFromArray( const std::vector<double>& array )
	{
		Matrix ret( array.size(), 1 );
		for ( int i = 0; i<array.size(); ++i )
		{
			ret.m_values[i][0] = array[i];
		}

		return ret;
	}

	static Matrix transpose( const Matrix& m )
	{
		Matrix ret( m.m_cols, m.m_rows );
		for ( int y = 0; y < ret.m_rows; ++y )
		{
			for ( int x = 0; x < ret.m_cols; ++x )
			{
				ret.m_values[y][x] = m.m_values[x][y];
			}
		}

		return ret;
	}


public:
	Matrix( int rows, int cols )
		: m_rows( rows )
		, m_cols( cols )
	{
		m_values.resize( rows );
		for ( auto& vec : m_values )
		{
			vec.resize( cols );
		}
	}

	Matrix( const Matrix& m )
		: m_rows( m.m_rows )
		, m_cols( m.m_cols )
		, m_values( m.m_values )
	{
	}

	Matrix& operator = ( const Matrix& m )
	{
		m_values = m.m_values;
		m_rows = m.m_rows;
		m_cols = m.m_cols;
		
		return *this;
	}

	double& getValue( int x, int y )
	{
		return m_values[y][x];
	}

	const double& getValue( int x, int y ) const
	{
		return m_values[y][x];
	}

	void randomizeN()
	{
		std::random_device rndDev;
		std::mt19937 random( rndDev() );
		std::uniform_real_distribution<double> dist( 0, 1.0 / std::sqrt( static_cast<double>(size()) ) );
		for ( auto& v : m_values )
		{
			for ( auto& d : v )
			{
				d = dist( random );
			}
		}
	}

	void multiplyElementwise( const Matrix& m )
	{
		for ( int y = 0; y < m_rows; ++y )
		{
			for ( int x = 0; x < m_cols; ++x )
			{
				m_values[y][x] *= m.m_values[y][x];
			}
		}
	}


	void multiplyScalar( double n )
	{
		for ( auto& r : m_values )
		{
			for ( auto& c : r )
			{
				c *= n;
			}
		}
	}



	void add( double n )
	{
		for ( auto& r : m_values )
		{
			for ( auto& c : r )
			{
				c += n;
			}
		}
	}

	void add( const Matrix& m )
	{
		if ( m_rows != m.m_rows || m_cols != m.m_cols ) return;
		
		for ( int y = 0; y < m_rows; ++y )
		{
			for ( int x = 0; x < m_cols; ++x )
			{
				m_values[y][x] += m.m_values[y][x];
			}
		}
	}

	Matrix& operator + ( const Matrix& m )
	{
		add( m );
		return *this;
	}
	

	void map( std::function<double(const double&)> fn )
	{
		for ( int y = 0; y < m_rows; ++y )
		{
			for ( int x = 0; x < m_cols; ++x )
			{
				m_values[y][x] = fn( m_values[y][x] );
			}
		}
	}

	void print()
	{
		std::printf("%s - rows: %d, cols: %d\n", __FUNCSIG__, m_rows, m_cols );
		for ( int y = 0; y < m_values.size(); ++y )
		{
			for ( int x = 0; x < m_values[y].size(); ++x )
			{
				std::printf("%f, ", m_values[y][x]);
			}

			std::printf( "\n" );
		}
	}

	int size() const
	{
		return m_rows * m_cols;
	}

	int m_rows;
	int m_cols;
	std::vector<std::vector<double>> m_values;
};



















class NeuralNetwork
{
public:
	static double sigmoid( const double& d )
	{
		return 1.0 / (1.0 + std::exp( -d ));
	}

	static double leaklyRelu( const double& d )
	{
		return std::max( d * 0.01, d );
		//return std::min( std::max( d * 0.01, d ), 6.0 );
	}


	static double dsigmoid( const double& d )
	{
		//const double s = sigmoid( d );
		//return s * (1 - s);
		return d * (1.0 - d);
	}

	static double dleaklyRelu( const double& d )
	{
		return (d >= 0.0) ? 1.0 : 0.01;
	}


	static double tanh( const double& d )
	{
		return std::tanh( d );
	}

	static double dtanh( const double& d )
	{
		return 1.0 - std::pow( std::tanh( d ), 2.0 );
	}

public:
	NeuralNetwork( int input, int hidden, int output )
	{
		create( input, hidden, output );
	}



	void create( int input, int hidden, int output )
	{
		m_weights.push_back( Matrix( hidden, input ) );
		m_weights.push_back( Matrix( output, hidden ) );
		for ( auto& m : m_weights )
		{
			m.randomizeN();
		}

		m_bias.push_back( Matrix( hidden, 1 ) );
		m_bias.push_back( Matrix( output, 1 ) );
		for ( auto& m : m_bias )
		{
			m.randomizeN();
		}

		m_output.resize( m_bias.size(), Matrix(output, 1) );
	}

	Matrix foward_prop( const Matrix& input )
	{
		Matrix m = input;

		for ( int i = 0; i < m_weights.size(); ++i )
		{
			m = Matrix::matmul( m_weights[i], m ) + m_bias[i];
			m.map( leaklyRelu );
			m_output[i] = m;
		}

		// 最後の奴だけoneHotしてみる
		oneHot( m );
		m_output.back() = m;

		return m;
	}


	void step( Matrix& m )
	{
		for ( int y = 0; y < m.m_rows; ++y )
		{
			for ( int x = 0; x < m.m_cols; ++x )
			{
				m.getValue( x, y ) = m.getValue( x, y ) > 0.5 ? 1 : 0;
			}
		}
	}

	void oneHot( Matrix& m )
	{
		int mostTopX = -1;
		int mostTopY = -1;
		double mostTopValue = -100;

		for ( int y = 0; y < m.m_rows; ++y )
		{
			for ( int x = 0; x < m.m_cols; ++x )
			{
				double& d = m.getValue( x, y );

				if ( d > mostTopValue )
				{
					mostTopValue = d;
					mostTopX = x;
					mostTopY = y;
				}

				d = 0;
			}
		}


		if ( mostTopX >= 0 && mostTopX < m.m_cols &&
			mostTopY >= 0 && mostTopY < m.m_rows )
		{
			m.getValue( mostTopX, mostTopY ) = 1;
		}
		else
		{
			std::printf("%s - err. couldnt find most top idx\n", __FUNCSIG__);
		}
	}


	// [参考]part4
	void train( const std::vector<std::pair<Matrix, Matrix>>& trainingDataList, double l_rate, int epoch )
	{
		// 学習をランダムにしてみる
		auto trainingDataListShuffle = trainingDataList;
		std::random_device randDev;
		std::mt19937 randMt( randDev() );

		for ( int e = 0; e < epoch; ++e )
		{
			// 学習順序をランダムに
			std::shuffle( trainingDataListShuffle.begin(), trainingDataListShuffle.end(), randMt );

			double sumError = 0;
			for ( auto& trainingData : trainingDataListShuffle )
			{
				// forward
				Matrix m = foward_prop( trainingData.first );

				

				// error
				{
					Matrix em = Matrix::sub( trainingData.second, m );
					em.map( []( const double& d ) -> double { return d * d; } );
					for ( int k = 0; k < em.m_rows; ++k )
					{
						sumError += em.getValue( 0, k );
					}
				}


				// training
				{
					Matrix err = Matrix::sub( trainingData.second, m_output.back() );

					for ( int i=m_weights.size()-1; i>=0; i--)
					{
						
						const Matrix tsWeight = Matrix::transpose( m_weights[i] );
						Matrix prevError = Matrix::matmul( tsWeight, err );		//< 前のレイヤー(次の処理)へ渡すerr
						
						Matrix dOutput = m_output[i];
						dOutput.map( dleaklyRelu );
						Matrix gradients = err;
						gradients.multiplyElementwise( dOutput );
						gradients.multiplyScalar( l_rate );

						
						const Matrix tsOutput = Matrix::transpose( i>0 ? m_output[i-1] : trainingData.first );
						const Matrix weightGrad = Matrix::matmul( gradients, tsOutput );
					
						m_bias[i].add( gradients );
						m_weights[i].add( weightGrad );

						err = prevError;
					}

				}


			}

			std::printf("[%d] %f\n", e, sumError);
		}
	}



	void print()
	{
		std::printf( "%s\n", __FUNCSIG__ );
		std::printf( "weights\n" );
		for ( int i = 0; i < m_weights.size(); ++i )
		{
			const Matrix& m = m_weights[i];
			for ( int y = 0; y < m.m_rows; ++y )
			{
				for ( int x = 0; x < m.m_cols; ++x )
				{
					std::printf( "[layer:%d][%d][%d]%f\n", i, y, x, m.getValue(x,y) );
				}
			}
		}

		std::printf( "bias\n" );
		for ( int i = 0; i < m_bias.size(); ++i )
		{
			const Matrix& m = m_bias[i];
			for ( int y = 0; y < m.m_rows; ++y )
			{
				for ( int x = 0; x < m.m_cols; ++x )
				{
					std::printf( "[layer:%d][%d][%d]%f\n", i, y, x, m.getValue( x, y ) );
				}
			}
		}
	}


	bool saveWeights( char* fileName ) const
	{
		std::ofstream outputFile( fileName );
		if ( outputFile.is_open() )
		{
			// save weights
			for ( auto& wm : m_weights )
			{
				for ( int y = 0; y < wm.m_rows; ++y )
				{
					for ( int x = 0; x < wm.m_cols; ++x )
					{
						auto weightStr = std::to_string( wm.getValue( x, y ) );
						outputFile << weightStr << ",";
					}
				}
			}

			// save bias
			for ( auto& bm : m_bias )
			{
				for ( int y = 0; y < bm.m_rows; ++y )
				{
					for ( int x = 0; x < bm.m_cols; ++x )
					{
						auto weightStr = std::to_string( bm.getValue( x, y ) );
						outputFile << weightStr << ",";
					}
				}
			}


			outputFile.close();
			return true;
		}

		return false;
	}

	bool loadWeights( char* fileName )
	{
		if ( fileName )
		{
			std::vector<double> weightsList;

			// read weights file
			{
				std::ifstream inputFile( fileName );

				if ( inputFile.is_open() )
				{
					std::string str;
					while ( std::getline( inputFile, str, ',' ) )
					{
						double d = std::stod( str );
						weightsList.push_back( d );
					}
				}
				inputFile.close();
			}

			if ( weightsList.empty() ) return false;

			// update weights
			int idx = 0;
			for ( auto& wm : m_weights )
			{
				for ( int y = 0; y < wm.m_rows; ++y )
				{
					for ( int x = 0; x < wm.m_cols; ++x )
					{
						if ( weightsList.size() <= idx ) return false;
						wm.getValue( x, y ) = weightsList[idx];
						idx++;
					}
				}
			}

			// update bias
			for ( auto& bm : m_bias )
			{
				for ( int y = 0; y < bm.m_rows; ++y )
				{
					for ( int x = 0; x < bm.m_cols; ++x )
					{
						if ( weightsList.size() <= idx ) return false;
						bm.getValue( x, y ) = weightsList[idx];
						idx++;
					}
				}
			}


			return true;
		}

		return false;
	}



//private:
public:
	std::vector<Matrix> m_weights;
	std::vector<Matrix> m_bias;
	std::vector<Matrix> m_output;
};




















// [参考]http://www.htmq.com/color/colorname.shtml
// 138 color
struct OriginalColorData
{
	double r, g, b;
	char* name;
} g_originalColorData[] = {
	{ 255, 255, 255, "white" },
	{ 245,245,245, "whitesmoke" },
	{ 248,248,255, "ghostwhite" },
	{ 240,248,255, "aliceblue" },
	{ 230,230,250, "lavender" },
	{ 240,255,255, "azure" },
	{ 224,255,255, "lightcyan" },
	{ 245,255,250, "mintcream" },
	{ 240,255,240, "honeydew" },
	{ 255,255,240, "ivory" },
	{ 245,245,220, "beige" },
	{ 255,255,224, "lightyellow" },
	{ 250,250,210, "lightgoldenrodyellow" },
	{ 255,250,205, "lemonchiffon" },
	{ 255,250,240, "floralwhite" },
	{ 253,245,230, "oldlace" },
	{ 255,248,220, "cornsilk" },
	{ 255,239,213, "papayawhite" },
	{ 255,235,205, "blanchedalmond" },
	{ 255,228,196, "bisque" },
	{ 255,250,250, "snow" },
	{ 250,240,230, "linen" },
	{ 250,235,215, "antiquewhite" },
	{ 255,245,238, "seashell" },
	{ 255,240,245, "lavenderblush" },
	{ 255,228,225, "mistyrose" },
	{ 220,220,220, "gainsboro" },
	{ 211,211,211, "lightgray" },
	{ 176,196,222, "lightsteelblue" },
	{ 173,216,230, "lightblue" },
	{ 135,206,250, "lightskyblue" },
	{ 176,224,230, "powderblue" },
	{ 175,238,238, "paleturquoise" },
	{ 135,206,235, "skyblue" },
	{ 102,205,170, "mediumaquamarine" },
	{ 127,255,212, "aquamarine" },
	{ 152,251,152, "palegreen" },
	{ 144,238,144, "lightgreen" },
	{ 240,230,140, "khaki" },
	{ 238,232,170, "palegoldenrod" },
	{ 255,228,181, "moccasin" },
	{ 255,222,173, "navajowhite" },
	{ 255,218,185, "peachpuff" },
	{ 245,222,179, "wheat" },
	{ 255,192,203, "pink" },
	{ 255,182,193, "lightpink" },
	{ 216,191,216, "thistle" },
	{ 221,160,221, "plum" },
	{ 192,192,192, "silver" },
	{ 169,169,169, "darkgray" },
	{ 119,136,153, "lightslategray" },
	{ 112,128,144, "slategray" },
	{ 106,90,205, "slateblue" },
	{ 70,130,180, "steelblue" },
	{ 123,104,238, "mediumslateblue" },
	{ 65,105,225, "royalblue" },
	{ 0,0,255, "blue" },
	{ 30,144,255, "dodgerblue" },
	{ 100,149,237, "cornflowerblue" },
	{ 0,191,255, "deepskyblue" },
	{ 0,255,255, "cyan(aqua)" },
	{ 64,224,208, "turquoise" },
	{ 72,209,204, "mediumturquoise" },
	{ 0,206,209, "darkturquoise" },
	{ 32,178,170, "lightseagreen" },
	{ 0,250,154, "mediumspringgreen" },
	{ 0,255,127, "springgreen" },
	{ 0,255,0, "lime" },
	{ 50,205,50, "limegreen" },
	{ 154,205,50, "yellowgreen" },
	{ 124,252,0, "lawngreen" },
	{ 127,255,0, "chartreuse" },
	{ 173,255,47, "greenyellow" },
	{ 255,255,0, "yellow" },
	{ 255,215,0, "gold" },
	{ 255,165,0, "orange" },
	{ 255,140,0, "darkorange" },
	{ 218,165,32, "goldenrod" },
	{ 222,184,135, "burlywood" },
	{ 210,180,140, "tan" },
	{ 244,164,96, "sandybrown" },
	{ 233,150,122, "darksalmon" },
	{ 240,128,128, "lightcoral" },
	{ 250,128,114, "salmon" },
	{ 255,160,122, "lightsalmon" },
	{ 255,127,80, "coral" },
	{ 255,99,71, "tomato" },
	{ 255,69,0, "orangered" },
	{ 255,0,0, "red" },
	{ 255,20,147, "deeppink" },
	{ 255,105,180, "hotpink" },
	{ 219,112,147, "palevioletred" },
	{ 238,130,238, "violet" },
	{ 218,112,214, "orchid" },
	{ 255,0,255, "magenta(fuchsia)" },
	{ 186,85,211, "mediumorchid" },
	{ 153,50,204, "darkorchid" },
	{ 148,0,211, "darkviolet" },
	{ 138,43,226, "blueviolet" },
	{ 147,112,219, "mediumpurple" },
	{ 128,128,128, "gray" },
	{ 0,0,205, "mediumblue" },
	{ 0,139,139, "darkcyan" },
	{ 95,158,160, "cadetblue" },
	{ 143,188,143, "darkseagreen" },
	{ 60,179,113, "mediumseagreen" },
	{ 0,128,128, "teal" },
	{ 34,139,34, "forestgreen" },
	{ 46,139,87, "seagreen" },
	{ 189,183,107, "darkkhaki" },
	{ 205,133,63, "peru" },
	{ 220,20,60, "crimson" },
	{ 205,92,92, "indianred" },
	{ 188,143,143, "rosybrown" },
	{ 199,21,133, "mediumvioletred" },
	{ 105,105,105, "dimgray" },
	{ 0,0,0, "black" },
	{ 25,25,112, "midnightblue" },
	{ 72,61,139, "darkslateblue" },
	{ 0,0,139, "darkblue" },
	{ 0,0,128, "navy" },
	{ 47,79,79, "darkslategray" },
	{ 0,128,0, "green" },
	{ 0,100,0, "darkgreen" },
	{ 85,107,47, "darkolivegreen" },
	{ 107,142,35, "olivedrab" },
	{ 128,128,0, "olive" },
	{ 184,134,11, "darkgoldenrod" },
	{ 210,105,30, "chocolate" },
	{ 160,82,45, "sienna" },
	{ 139,69,19, "saddlebrown" },
	{ 178,34,34, "firebrick" },
	{ 165,42,42, "brown" },
	{ 128,0,0, "maroon" },
	{ 139,0,0, "darkred" },
	{ 139,0,139, "darkmagenta" },
	{ 128,0,128, "purple" },
	{ 75,0,130, "indigo" },
};


static void normalizeColors()
{
	for ( auto& data : g_originalColorData )
	{
		data.r /= 255;
		data.g /= 255;
		data.b /= 255;
	}
}





static std::vector<std::pair<Matrix, Matrix>> createTrainingDataList()
{
	std::vector< std::pair<Matrix, Matrix> > ret;

	for ( int i = 0; i < _countof( g_originalColorData ); ++i )
	{
		// input
		std::pair<Matrix, Matrix> data( Matrix( 1, 1 ), Matrix( 1, 1 ) );
		data.first = Matrix::createFromArray( std::vector<double>( { g_originalColorData[i].r, g_originalColorData[i].g, g_originalColorData[i].b } ) );

		// output
		std::vector<double> tmp( _countof( g_originalColorData ) );
		tmp[i] = 1;
		data.second = Matrix::createFromArray( tmp );

		ret.push_back( data );
	}

	return ret;
}





static int getMostTopIdx( const Matrix& nnOutputs )
{
	int mostTopRateIdx = -1;
	double mostTopRate = 0;
	
	for ( int i = 0; i < nnOutputs.m_rows; ++i )
	{
		auto value = nnOutputs.getValue( 0, i );

		if ( value > mostTopRate )
		{
			mostTopRate = value;
			mostTopRateIdx = i;
		}
	}

	return mostTopRateIdx;
}





int main()
{
	normalizeColors();

	NeuralNetwork nn( 3, _countof(g_originalColorData), _countof( g_originalColorData ) );
	nn.loadWeights( "weights.txt" );
	const auto trainingDataList = createTrainingDataList();


	for (;;)
	{
		
		nn.train( trainingDataList, 0.05, 5000 );

		for ( auto& data : trainingDataList )
		{
			auto output = nn.foward_prop( data.first );

			const int idx = getMostTopIdx( output );
			if ( idx >= 0 && idx < _countof( g_originalColorData ) )
			{
				const int r = static_cast<int>(g_originalColorData[idx].r * 255);
				const int g = static_cast<int>(g_originalColorData[idx].g * 255);
				const int b = static_cast<int>(g_originalColorData[idx].b * 255);
				const int r2 = data.first.getValue( 0, 0 ) * 255;
				const int g2 = data.first.getValue( 0, 1 ) * 255;
				const int b2 = data.first.getValue( 0, 2 ) * 255;
				std::printf( "answer is %d,%d,%d, %s ( correct: %d, %d, %d )\n", r, g, b, g_originalColorData[idx].name, r2, g2, b2 );
			}
			else
			{
				std::printf( "idx error\n" );
			}
		}


		nn.saveWeights( "weights.txt" );
		system( "PAUSE" );
	}

	return 0;
}


ちょっと前に作ったjavascript版のニューラルネットワークと同様に、
HTMLカラーコードを学習させることにしました。
xor以外で手ごろな大きさの学習が意外にぱっと出てこないから仕方ないね。



ときどき学習させるとエラーが肥大化していって、最後はdoubleがNaNになって死ぬことがあって、
これが最後まで改善できませんでした:<。
今回はカラーコードのIndexを出力できればいいので、
oneHotを導入することで学習がアバウトでも動作するようにしてみました。
oneHotなしだと途中で学習が停滞してしまうことが多くて、たぶんニューロンが少ないんだろうなと思ってるけど詳細は不明。


そのままでは処理が遅すぎてどうしようもないと思ったので、適当に最適化してみたのがこちら。



[ソースコード]



#include <cstdio>
#include <cmath>
#include <vector>
#include <random>
#include <future>

#include <iostream>
#include <fstream>
#include <string>








class Matrix
{
public:



	static Matrix matmul( const Matrix& a, const Matrix& b )
	{
		Matrix ret( a.m_rows, b.m_cols );
		matmul( ret, a, b );
		return ret;
	}


	// outputとa,bが同じアドレスだとうまく動作しない
	static void matmul( Matrix& output, const Matrix& a, const Matrix& b )
	{
		if ( a.m_cols == b.m_rows )
		{
			output.resize( a.m_rows, b.m_cols );
			output.zero();

			double* pOutStart = output.m_values.data();
			const double* pAStart = a.m_values.data();
			const double* pBStart = b.m_values.data();
			const int addOutY = output.m_cols;
			const int addAY = a.m_cols;
			const int addBY = b.m_cols;

			const int yMax = output.m_rows;
			const int xMax = output.m_cols;
			const int zMax = a.m_cols;
			for ( int y = 0; y < yMax; y++ )
			{
				const double* pA = pAStart;
				const double* pBStart2 = pBStart;
				for ( int z = 0; z < zMax; ++z )
				{
					double* pOut = pOutStart;
					const double* pB = pBStart2;
					for ( int x = 0; x < xMax; x++ )
					{
						*pOut += *pA * *pB;

						pOut++;
						pB++;
					}
					pA++;
					pBStart2 += addBY;
				}
				pOutStart += addOutY;
				pAStart += addAY;

			}
		}
		else
		{
			std::printf( "%s - miss match matrix.[%dx%d]x[%dx%d]\n", __FUNCSIG__, a.m_rows, a.m_cols, b.m_rows, b.m_cols );
		}
	}


	// outputとa,bが同じアドレスだとうまく動作しない
	// a=normal, b=transposeにして乗算
	static void matmul_nt( Matrix& out, const Matrix& a, const Matrix& b )
	{
		const int b_rows = b.m_cols;
		const int b_cols = b.m_rows;

		if ( a.m_cols == b_rows )
		{
			out.resize( a.m_rows, b_cols );
			out.zero();


			double* pOutStart = out.m_values.data();
			const double* pAStart = a.m_values.data();
			const double* pBStart = b.m_values.data();
			const int addOutY = out.m_cols;
			const int addAY = a.m_cols;
			const int addBY = b.m_cols;

			const int yMax = out.m_rows;
			const int xMax = out.m_cols;
			const int zMax = a.m_cols;
			for ( int y = 0; y < yMax; y++ )
			{
				const double* pA = pAStart;
				const double* pBStart2 = pBStart;
				for ( int z = 0; z < zMax; ++z )
				{
					double* pOut = pOutStart;
					const double* pB = pBStart2;
					for ( int x = 0; x < xMax; x++ )
					{
						*pOut += *pA * *pB;

						pOut++;
						pB+=addBY;
					}
					pA++;
					pBStart2++;
				}
				pOutStart += addOutY;
				pAStart += addAY;

			}
		}
		else
		{
			std::printf( "%s - miss match matrix.[%dx%d]x[%dx%d]\n", __FUNCSIG__, a.m_rows, a.m_cols, b.m_rows, b.m_cols );
		}
	}


	// outputとa,bが同じアドレスだとうまく動作しない
	// a=transpose, b=normalにして乗算
	static void matmul_tn( Matrix& out, const Matrix& a, const Matrix& b )
	{
		const int a_rows = a.m_cols;
		const int a_cols = a.m_rows;

		
		if ( a_cols == b.m_rows )
		{
			out.resize( a_rows, b.m_cols );
			out.zero();


			double* pOutStart = out.m_values.data();
			const double* pAStart = a.m_values.data();
			const double* pBStart = b.m_values.data();
			const int addOutY = out.m_cols;
			const int addAY = a.m_cols;
			const int addBY = b.m_cols;

			const int yMax = out.m_rows;
			const int xMax = out.m_cols;
			const int zMax = a_cols;
			for ( int y = 0; y < yMax; y++ )
			{
				const double* pA = pAStart;
				const double* pBStart2 = pBStart;
				for ( int z = 0; z < zMax; ++z )
				{
					double* pOut = pOutStart;
					const double* pB = pBStart2;
					for ( int x = 0; x < xMax; x++ )
					{
						*pOut += *pA * *pB;

						pOut++;
						pB += addBY;
					}
					pA += addAY;
					pBStart2++;
				}
				pOutStart += addOutY;
				pAStart++;

			}
		}
		else
		{
			std::printf( "%s - miss match matrix.[%dx%d]x[%dx%d]\n", __FUNCSIG__, a.m_rows, a.m_cols, b.m_rows, b.m_cols );
		}
	}

#if 0
	static Matrix sub( const Matrix& a, const Matrix& b )
	{
		Matrix ret( a.m_rows, a.m_cols );
		if ( a.m_cols == b.m_cols &&
			a.m_rows == b.m_rows )
		{
			double* pOut = ret.m_values.data();
			const double* pA = a.m_values.data();
			const double* pB = b.m_values.data();
			const int n = ret.size();
			for ( int i = 0; i < n; ++i )
			{
				*pOut = *pA - *pB;

				pOut++;
				pA++;
				pB++;
			}
		}
		else
		{
			std::printf("%s - not same size[%d,%d]-[%d,%d]\n", __FUNCSIG__, a.m_rows, a.m_cols, b.m_rows, b.m_cols );
		}

		return ret;
	}
#endif

	static void sub( Matrix& out, const Matrix& a, const Matrix& b )
	{
		if ( a.m_cols == b.m_cols &&
			a.m_rows == b.m_rows )
		{
			out.resize( a.m_rows, a.m_cols );

			double* pOut = out.m_values.data();
			const double* pA = a.m_values.data();
			const double* pB = b.m_values.data();
			const int n = out.size();
			for ( int i = 0; i < n; ++i )
			{
				*pOut = *pA - *pB;

				pOut++;
				pA++;
				pB++;
			}
		}
		else
		{
			std::printf( "%s - not same size[%d,%d]-[%d,%d]\n", __FUNCSIG__, a.m_rows, a.m_cols, b.m_rows, b.m_cols );
		}
	}

	static Matrix createFromArray( const std::vector<double>& array )
	{
		Matrix ret( array.size(), 1 );
		const double* pArray = array.data();
		const int arraySize = array.size();
		for ( int i = 0; i < arraySize; ++i )
		{
			ret.getValue(0,i) = *pArray;
			pArray++;
		}

		return ret;
	}



	// 使ってないけど一応残しておく
	static Matrix transpose( const Matrix& m )
	{
		Matrix ret( m.m_cols, m.m_rows );

		double* pOutStart = ret.m_values.data();
		const double* pInStart = m.m_values.data();
		const int addOutY = ret.m_cols;
		const int addInY = m.m_cols;
		for ( int y = 0; y < ret.m_rows; ++y )
		{
			double* pOut = pOutStart;
			const double* pIn = pInStart;
			for ( int x = 0; x < ret.m_cols; ++x )
			{
				*pOut = *pIn;

				pOut++;
				pIn += addInY;
			}

			pOutStart += addOutY;
			pInStart++;;
		}

		return ret;
	}



public:
	Matrix( int rows, int cols )
		: m_rows( rows )
		, m_cols( cols )
	{
		m_values.resize( rows * cols );
	}

	Matrix( const Matrix& m )
		: m_rows( m.m_rows )
		, m_cols( m.m_cols )
		, m_values( m.m_values )
	{
	}

	Matrix& operator = ( const Matrix& m )
	{
		resize( m.m_rows, m.m_cols );
		memcpy( m_values.data(), m.m_values.data(), m.size() * sizeof( double ) );

		return *this;
	}

	void resize( int rows, int cols )
	{
		if ( m_rows != rows ||
			m_cols != cols )
		{
			if ( m_values.size() < rows * cols )
			{
				m_values.resize( rows * cols );
			}

			m_rows = rows;
			m_cols = cols;
		}
	}

	double& getValue( int x, int y )
	{
		return m_values[y * m_cols + x];
	}

	const double& getValue( int x, int y ) const
	{
		return m_values[y * m_cols + x];
	}

	
	void randomizeN()
	{
		std::random_device rndDev;
		std::mt19937 random( rndDev() );
		//std::mt19937 random( 0 );
		std::uniform_real_distribution<double> dist( 0, 1.0 / std::sqrt(static_cast<double>(size())) );
		for ( auto& v : m_values )
		{
			v = dist( random );
		}
	}

	void multiplyElementwise( const Matrix& m )
	{
		if ( m_rows != m.m_rows || m_cols != m.m_cols )
		{
			std::printf( "%s - not same size[%d,%d]-[%d,%d]\n", __FUNCSIG__, m_rows, m_cols, m.m_rows, m.m_cols );
			return;
		}

		double* pOut = m_values.data();
		const double* pIn = m.m_values.data();
		const int n = size();
		for ( int i = 0; i < n; ++i )
		{
			*pOut *= *pIn;
			pOut++;
			pIn++;
		}
	}


	void multiplyScalar( double d )
	{

		double* pOut = m_values.data();
		int n = size();
		for ( int i = 0; i < n; ++i )
		{
			*pOut *= d;
			pOut++;
		}
	}

	void add( double d )
	{
		double* pOut = m_values.data();
		int n = size();
		for ( int i = 0; i < n; ++i )
		{
			*pOut += d;

			

			pOut++;
		}

	}

	void add( const Matrix& m )
	{
		if ( m_rows != m.m_rows || m_cols != m.m_cols )
		{
			std::printf( "%s - not same size[%d,%d]-[%d,%d]\n", __FUNCSIG__, m_rows, m_cols, m.m_rows, m.m_cols );
			return;
		}

		double* pOut = m_values.data();
		const double* pIn = m.m_values.data();
		int n = size();
		for ( int i = 0; i < n; ++i )
		{
			*pOut += *pIn;
			pOut++;
			pIn++;
		}

	}

	Matrix& operator + ( const Matrix& m )
	{
		add( m );
		return *this;
	}


	void map( std::function<double( const double& )> fn )
	{
		double* pOut = m_values.data();
		const int n = size();
		for ( int i = 0; i < n; ++i )
		{
			*pOut = fn( *pOut );
			pOut++;
		}

	}

	void print() const
	{
		std::printf( "%s - rows: %d, cols: %d\n", __FUNCSIG__, m_rows, m_cols );
		for ( int y = 0; y < m_rows; ++y )
		{
			for ( int x = 0; x < m_cols; ++x )
			{
				std::printf( "%f, ", getValue(x,y) );
			}

			std::printf( "\n" );
		}
	}



	void zero()
	{
		memset( m_values.data(), 0, m_values.size() * sizeof( double ) );
	}

	int size() const
	{
		return m_rows * m_cols;
	}

	int m_rows;
	int m_cols;
	std::vector<double> m_values;
};














class NeuralNetwork
{
public:
	static double sigmoid( const double& d )
	{
		return 1.0 / (1.0 + std::exp( -d ));
	}

	static double leaklyRelu( const double& d )
	{
		return std::max( d * 0.01, d );
		//return std::min( std::max( d * 0.01, d ), 6.0 );
	}


	static double dsigmoid( const double& d )
	{
		//const double s = sigmoid( d );
		//return s * (1 - s);
		return d * (1.0 - d);
	}

	static double dleaklyRelu( const double& d )
	{
		return (d >= 0.0) ? 1.0 : 0.01;
	}


	static double tanh( const double& d )
	{
		return std::tanh( d );
	}

	static double dtanh( const double& d )
	{
		return 1.0 - std::pow( std::tanh( d ), 2.0 );
	}

public:
	NeuralNetwork( int input, int hidden, int output )
	{
		create( input, hidden, output );
	}

	NeuralNetwork( int input, int hidden, int hidden2, int output )
	{
		create( input, hidden, hidden2, output );
	}

	void create( int input, int hidden, int output )
	{
		m_weights.push_back( Matrix( hidden, input ) );
		m_weights.push_back( Matrix( output, hidden ) );
		for ( auto& m : m_weights )
		{
			m.randomizeN();
		}

		m_bias.push_back( Matrix( hidden, 1 ) );
		m_bias.push_back( Matrix( output, 1 ) );
		for ( auto& m : m_bias )
		{
			m.randomizeN();
		}

		m_output.resize( m_bias.size(), Matrix(output, 1) );
	}

	void create( int input, int hidden, int hidden2, int output )
	{
		m_weights.push_back( Matrix( hidden, input ) );
		m_weights.push_back( Matrix( hidden2, hidden ) );
		m_weights.push_back( Matrix( output, hidden2 ) );
		for ( auto& m : m_weights )
		{
			m.randomizeN();
		}

		m_bias.push_back( Matrix( hidden, 1 ) );
		m_bias.push_back( Matrix( hidden2, 1 ) );
		m_bias.push_back( Matrix( output, 1 ) );
		for ( auto& m : m_bias )
		{
			m.randomizeN();
		}

		m_output.resize( m_bias.size(), Matrix( output, 1 ) );

	}


	Matrix foward_prop( const Matrix& input )
	{
		Matrix* pM = const_cast<Matrix*>(&input);
		Matrix* pOut = m_output.data();
		Matrix* pWeights = m_weights.data();
		Matrix* pBias = m_bias.data();
		const int n = m_weights.size();
		for ( int i = 0; i < n; ++i )
		{
			Matrix::matmul( *pOut, *pWeights, *pM );
			pOut->add( *pBias );
			pOut->map( leaklyRelu );

			pM = pOut;
			pOut++;
			pWeights++;
			pBias++;
		}

		oneHot( *pM );

		return *pM;
	}


	void step( Matrix& m )
	{
		for ( int i = 0; i < m.size(); ++i )
		{
			m.m_values[i] =  m.m_values[i] > 0.5 ? 1 : 0;
		}
	}

	void oneHot( Matrix& m )
	{
		int mostTopIdx = -1;
		double mostTopValue = -DBL_MIN;
		for ( int i = 0; i < m.size(); ++i )
		{
			double& d = m.m_values[i];
			if ( d > mostTopValue )
			{
				mostTopValue = d;
				mostTopIdx = i;
			}

			d = 0;
		}

		if ( mostTopIdx >= 0 && mostTopIdx < m.size() )
		{
			m.m_values[mostTopIdx] = 1;
		}
		else
		{
			std::printf("%s - err. couldnt find most top idx\n", __FUNCSIG__);
		}
	}


	// [参考]part4
	void train( const std::vector<std::pair<Matrix, Matrix>>& trainingDataList, double l_rate, int epoch )
	{
		// メモリ確保を防ぐために外にだしておいて、メモリ確保回数を最小限にしてみる
		Matrix dOutput( 1, 1 );
		Matrix prevError( 1, 1 );
		Matrix gradients( 1, 1 );
		Matrix weightGrad( 1, 1 );
		Matrix err( 1, 1 );

		// 学習をランダムにしてみる
		auto trainingDataListShuffle = trainingDataList;
		std::random_device randDev;
		std::mt19937 randMt( randDev() );
		//std::mt19937 randMt( 0 );

		for ( int e = 0; e < epoch; ++e )
		{
			std::shuffle( trainingDataListShuffle.begin(), trainingDataListShuffle.end(), randMt );

			double sumError = 0;
			for ( auto& trainingData : trainingDataListShuffle )
			{
				// forward
				foward_prop( trainingData.first );


				// calc error
				Matrix::sub( err, trainingData.second, m_output.back() );

				// sumError
				for ( int i = 0; i < err.m_rows; ++i )
				{
					const float d = err.getValue( 0, i );
					sumError += d * d;
				}
				

				// training
				for ( int i=m_weights.size()-1; i>=0; i--)
				{
					Matrix::matmul_tn( prevError, m_weights[i], err );		//< 前のレイヤー(次の処理)へ渡すerr


					dOutput = m_output[i];
					dOutput.map( dleaklyRelu );
					/*
					Matrix& gradients = err;	//< これが地味に重い。わかりやすくするために名前だけ変えておきたかったが
					gradients.multiplyElementwise( dOutput );
					gradients.multiplyScalar( l_rate );

					Matrix::matmul_nt( weightGrad, gradients, i > 0 ? m_output[i - 1] : trainingData.first );

					m_bias[i].add( gradients );
					m_weights[i].add( weightGrad );
					*/
					err.multiplyElementwise( dOutput );
					err.multiplyScalar( l_rate );

					Matrix::matmul_nt( weightGrad, err, i > 0 ? m_output[i - 1] : trainingData.first );

					m_bias[i].add( err );
					m_weights[i].add( weightGrad );
						
					err = prevError;
				}
			}

			std::printf("[%d] %f\n", e, sumError);
		}
	}



	void print()
	{
		std::printf( "%s\n", __FUNCSIG__ );
		std::printf( "weights\n" );
		for ( int i = 0; i < m_weights.size(); ++i )
		{
			const Matrix& m = m_weights[i];
			for ( int y = 0; y < m.m_rows; ++y )
			{
				for ( int x = 0; x < m.m_cols; ++x )
				{
					std::printf( "[layer:%d][%d][%d]%f\n", i, y, x, m.getValue(x,y) );
				}
			}
		}

		std::printf( "bias\n" );
		for ( int i = 0; i < m_bias.size(); ++i )
		{
			const Matrix& m = m_bias[i];
			for ( int y = 0; y < m.m_rows; ++y )
			{
				for ( int x = 0; x < m.m_cols; ++x )
				{
					std::printf( "[layer:%d][%d][%d]%f\n", i, y, x, m.getValue(x,y) );
				}
			}
		}
	}


	bool saveWeights( char* fileName ) const
	{
		std::ofstream outputFile( fileName );
		if ( outputFile.is_open() )
		{
			// save weights
			for ( auto& m : m_weights )
			{
				for ( auto& d : m.m_values )
				{
					auto weightStr = std::to_string( d );
					outputFile << weightStr << ",";
				}
			}

			// save bias
			for ( auto& m : m_bias )
			{
				for ( auto& d : m.m_values )
				{
					auto weightStr = std::to_string( d );
					outputFile << weightStr << ",";
				}
			}


			outputFile.close();
			return true;
		}

		return false;
	}

	bool loadWeights( char* fileName )
	{
		if ( fileName )
		{
			std::vector<double> weightsList;

			// read weights file
			{
				std::ifstream inputFile( fileName );

				if ( inputFile.is_open() )
				{
					std::string str;
					while ( std::getline( inputFile, str, ',' ) )
					{
						double d = std::stod( str );
						weightsList.push_back( d );
					}
				}
				inputFile.close();
			}

			if ( weightsList.empty() ) return false;

			// update weights
			int idx = 0;
			for ( auto& m : m_weights )
			{
				for ( auto& d : m.m_values )
				{
					if ( weightsList.size() <= idx ) break;
					d = weightsList[idx++];
				}
			}

			// update bias
			for ( auto& m : m_bias )
			{
				for ( auto& d : m.m_values )
				{
					if ( weightsList.size() <= idx ) break;
					d = weightsList[idx++];
				}
			}


			return true;
		}

		return false;
	}



//private:
public:
	std::vector<Matrix> m_weights;
	std::vector<Matrix> m_bias;
	std::vector<Matrix> m_output;
};




















// [参考]http://www.htmq.com/color/colorname.shtml
// 140 color
struct OriginalColorData
{
	double r, g, b;
	char* name;
} g_originalColorData[] = {
	{ 255, 255, 255, "white" },
	{ 245,245,245, "whitesmoke" },
	{ 248,248,255, "ghostwhite" },
	{ 240,248,255, "aliceblue" },
	{ 230,230,250, "lavender" },
	{ 240,255,255, "azure" },
	{ 224,255,255, "lightcyan" },
	{ 245,255,250, "mintcream" },
	
	{ 240,255,240, "honeydew" },
	{ 255,255,240, "ivory" },
	{ 245,245,220, "beige" },
	{ 255,255,224, "lightyellow" },
	{ 250,250,210, "lightgoldenrodyellow" },
	{ 255,250,205, "lemonchiffon" },
	{ 255,250,240, "floralwhite" },
	{ 253,245,230, "oldlace" },
	{ 255,248,220, "cornsilk" },
	{ 255,239,213, "papayawhite" },
	{ 255,235,205, "blanchedalmond" },
	
	{ 255,228,196, "bisque" },
	{ 255,250,250, "snow" },
	{ 250,240,230, "linen" },
	{ 250,235,215, "antiquewhite" },
	{ 255,245,238, "seashell" },
	{ 255,240,245, "lavenderblush" },
	{ 255,228,225, "mistyrose" },
	{ 220,220,220, "gainsboro" },
	{ 211,211,211, "lightgray" },
	{ 176,196,222, "lightsteelblue" },
	{ 173,216,230, "lightblue" },
	{ 135,206,250, "lightskyblue" },
	
	{ 176,224,230, "powderblue" },
	{ 175,238,238, "paleturquoise" },
	{ 135,206,235, "skyblue" },
	{ 102,205,170, "mediumaquamarine" },
	{ 127,255,212, "aquamarine" },
	{ 152,251,152, "palegreen" },
	{ 144,238,144, "lightgreen" },
	{ 240,230,140, "khaki" },
	{ 238,232,170, "palegoldenrod" },
	{ 255,228,181, "moccasin" },
	{ 255,222,173, "navajowhite" },
	{ 255,218,185, "peachpuff" },
	{ 245,222,179, "wheat" },
	{ 255,192,203, "pink" },
	{ 255,182,193, "lightpink" },
	{ 216,191,216, "thistle" },
	{ 221,160,221, "plum" },
	{ 192,192,192, "silver" },
	{ 169,169,169, "darkgray" },
	{ 119,136,153, "lightslategray" },
	{ 112,128,144, "slategray" },
	{ 106,90,205, "slateblue" },

	//*
	{ 70,130,180, "steelblue" },
	{ 123,104,238, "mediumslateblue" },
	{ 65,105,225, "royalblue" },
	{ 0,0,255, "blue" },
	{ 30,144,255, "dodgerblue" },
	{ 100,149,237, "cornflowerblue" },
	{ 0,191,255, "deepskyblue" },
	{ 0,255,255, "cyan(aqua)" },
	{ 64,224,208, "turquoise" },

	
	{ 72,209,204, "mediumturquoise" },
	{ 0,206,209, "darkturquoise" },
	{ 32,178,170, "lightseagreen" },
	{ 0,250,154, "mediumspringgreen" },
	{ 0,255,127, "springgreen" },
	{ 0,255,0, "lime" },
	{ 50,205,50, "limegreen" },
	{ 154,205,50, "yellowgreen" },
	{ 124,252,0, "lawngreen" },
	{ 127,255,0, "chartreuse" },
	{ 173,255,47, "greenyellow" },
	{ 255,255,0, "yellow" },
	{ 255,215,0, "gold" },
	{ 255,165,0, "orange" },
	{ 255,140,0, "darkorange" },
	{ 218,165,32, "goldenrod" },
	{ 222,184,135, "burlywood" },
	{ 210,180,140, "tan" },
	{ 244,164,96, "sandybrown" },
	{ 233,150,122, "darksalmon" },
	{ 240,128,128, "lightcoral" },
	{ 250,128,114, "salmon" },
	{ 255,160,122, "lightsalmon" },
	{ 255,127,80, "coral" },
	{ 255,99,71, "tomato" },
	{ 255,69,0, "orangered" },
	{ 255,0,0, "red" },
	{ 255,20,147, "deeppink" },
	{ 255,105,180, "hotpink" },
	{ 219,112,147, "palevioletred" },
	{ 238,130,238, "violet" },
	{ 218,112,214, "orchid" },
	{ 255,0,255, "magenta(fuchsia)" },
	{ 186,85,211, "mediumorchid" },
	{ 153,50,204, "darkorchid" },
	{ 148,0,211, "darkviolet" },
	{ 138,43,226, "blueviolet" },
	{ 147,112,219, "mediumpurple" },
	{ 128,128,128, "gray" },
	{ 0,0,205, "mediumblue" },
	{ 0,139,139, "darkcyan" },
	{ 95,158,160, "cadetblue" },
	{ 143,188,143, "darkseagreen" },
	{ 60,179,113, "mediumseagreen" },
	{ 0,128,128, "teal" },
	{ 34,139,34, "forestgreen" },
	{ 46,139,87, "seagreen" },
	{ 189,183,107, "darkkhaki" },
	{ 205,133,63, "peru" },
	{ 220,20,60, "crimson" },
	{ 205,92,92, "indianred" },
	{ 188,143,143, "rosybrown" },
	{ 199,21,133, "mediumvioletred" },
	{ 105,105,105, "dimgray" },
	{ 0,0,0, "black" },
	{ 25,25,112, "midnightblue" },
	{ 72,61,139, "darkslateblue" },
	{ 0,0,139, "darkblue" },
	{ 0,0,128, "navy" },
	{ 47,79,79, "darkslategray" },
	{ 0,128,0, "green" },
	{ 0,100,0, "darkgreen" },
	{ 85,107,47, "darkolivegreen" },
	{ 107,142,35, "olivedrab" },
	{ 128,128,0, "olive" },
	{ 184,134,11, "darkgoldenrod" },
	{ 210,105,30, "chocolate" },
	{ 160,82,45, "sienna" },
	{ 139,69,19, "saddlebrown" },
	{ 178,34,34, "firebrick" },
	{ 165,42,42, "brown" },
	{ 128,0,0, "maroon" },
	{ 139,0,0, "darkred" },
	{ 139,0,139, "darkmagenta" },
	{ 128,0,128, "purple" },
	{ 75,0,130, "indigo" },
	//*/
};


static void normalizeColors()
{
	for ( auto& data : g_originalColorData )
	{
		data.r /= 255;
		data.g /= 255;
		data.b /= 255;
	}
}





static std::vector<std::pair<Matrix, Matrix>> createTrainingDataList()
{
	std::vector< std::pair<Matrix, Matrix> > ret;

	for ( int i = 0; i < _countof( g_originalColorData ); ++i )
	{
		// input
		std::pair<Matrix, Matrix> data( Matrix( 1, 1 ), Matrix( 1, 1 ) );
		data.first = Matrix::createFromArray( std::vector<double>( { g_originalColorData[i].r, g_originalColorData[i].g, g_originalColorData[i].b } ) );

		// output
		std::vector<double> tmp( _countof( g_originalColorData ) );
		tmp[i] = 1;
		data.second = Matrix::createFromArray( tmp );

		ret.push_back( data );
	}

	return ret;
}





static int getMostTopIdx( const Matrix& nnOutputs )
{
	int mostTopRateIdx = -1;
	double mostTopRate = 0;
	
	for ( int i = 0; i < nnOutputs.m_rows; ++i )
	{
		//auto value = nnOutputs.m_values[i][0];
		auto value = nnOutputs.getValue( 0, i );
		if ( value > mostTopRate )
		{
			mostTopRate = value;
			mostTopRateIdx = i;
		}
	}

	return mostTopRateIdx;
}





int main()
{
	normalizeColors();

	NeuralNetwork nn( 3, _countof(g_originalColorData), _countof( g_originalColorData ) );
	nn.loadWeights( "weights.txt" );
	const auto trainingDataList = createTrainingDataList();


	for (;;)
	{
		
		nn.train( trainingDataList, 0.05, 5000 );

		for ( auto& data : trainingDataList )
		{
			auto output = nn.foward_prop( data.first );
			//output.print();
			const int idx = getMostTopIdx( output );
			if ( idx >= 0 && idx < _countof( g_originalColorData ) )
			{
				const int r = static_cast<int>(g_originalColorData[idx].r * 255);
				const int g = static_cast<int>(g_originalColorData[idx].g * 255);
				const int b = static_cast<int>(g_originalColorData[idx].b * 255);
				const int r2 = data.first.getValue( 0, 0 ) * 255;
				const int g2 = data.first.getValue( 0, 1 ) * 255;
				const int b2 = data.first.getValue( 0, 2 ) * 255;
				std::printf( "answer is %d,%d,%d, %s ( correct: %d, %d, %d )\n", r, g, b, g_originalColorData[idx].name, r2, g2, b2 );
			}
			else
			{
				std::printf( "idx error\n" );
			}
		}

		//nn.print();
		nn.saveWeights( "weights.txt" );
		system( "PAUSE" );
	}

	return 0;
}




最適化の方針としては、
・Matrixを二次元配列から一次元配列にしてキャッシュに乗りやすく。これは参考サイトさんほぼそのままです。
・行列の乗算時のアドレス計算を、ポインタを使用して掛け算をできるだけ消しました。乗除算は基本的に重いので繰り返し処理の内側ではできるだけ減らしたい。
・行列の乗算を、計算順序を変えてキャッシュに乗りやすいらしい順序に変更。

static Matrix matmul( const Matrix& a, const Matrix& b )
{
	Matrix ret( a.m_rows, b.m_cols );

	if ( a.m_cols == b.m_rows )
	{

		for ( int y = 0; y < ret.m_rows; y++ )
		{
			for ( int z = 0; z < a.m_cols; ++z )
			{
				for ( int x = 0; x < ret.m_cols; x++ )
				{
					ret.getValue( x, y ) += a.getValue( z, y ) * b.getValue( x, z );
				}
			}
		}
	}

	return ret;
}
 zとxの順番が逆に。これをポインタでぐちゃぐちゃにしたのが上のになります。実際に動作させてみると確かにちょっと早くなった。
・transpose処理いらなくない?
 と思って専用の乗算処理を用意してみました。乗算処理内でアドレス計算を縦横入れ替えています。
・行列用のメモリを確保するの重いので、メモリが足りていればメモリ確保しないようにして一時変数として使いまわすようにしています。



最終的には既存のライブラリを使うほうが早いし汎用性もあるということになるので、深入りはやめました。
GPU使うなり、マルチスレッドを使うなりあると思います。