Skip to content
This repository was archived by the owner on Mar 12, 2020. It is now read-only.

Commit a44ce5d

Browse files
...
1 parent b87e8c3 commit a44ce5d

File tree

8 files changed

+66
-192
lines changed

8 files changed

+66
-192
lines changed

Examples/MNIST/Program.cs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@ static void Main(string[] args)
1414
{
1515
Global.UseGpu();
1616

17+
Tensor x = Tensor.FromArray(Global.Device, new float[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 });
18+
x = x.Reshape(3, 3);
19+
20+
var result = TOps.Diag(x);
21+
result.Print();
22+
1723
string datasetFolder = @"C:\dataset\MNIST";
1824
bool useDenseModel = false;
1925

@@ -50,9 +56,9 @@ private static Sequential BuildFCModel()
5056
private static Sequential BuildConvModel()
5157
{
5258
Sequential model = new Sequential();
53-
model.Add(new Conv2D(filters: 16, kernalSize: Tuple.Create<uint, uint>(5, 5), activation: ActType.Sigmoid));
59+
model.Add(new Conv2D(filters: 16, kernalSize: Tuple.Create<uint, uint>(5, 5), activation: ActType.ReLU));
5460
model.Add(new MaxPooling2D(poolSize: Tuple.Create<uint, uint>(2, 2)));
55-
model.Add(new Conv2D(filters: 32, kernalSize: Tuple.Create<uint, uint>(5, 5), activation: ActType.Sigmoid));
61+
model.Add(new Conv2D(filters: 32, kernalSize: Tuple.Create<uint, uint>(5, 5), activation: ActType.ReLU));
5662
model.Add(new MaxPooling2D(poolSize: Tuple.Create<uint, uint>(2, 2)));
5763
//model.Add(new Dropout(0.2f));
5864
model.Add(new Flatten());

ManagedCuda/ManagedCuda.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
</PropertyGroup>
2424

2525
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
26-
<DefineConstants>TRACE;WIN,CUDA90,CUDNN7</DefineConstants>
26+
<DefineConstants>TRACE;WIN,CUDA100,CUDNN7</DefineConstants>
2727
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
2828
</PropertyGroup>
2929

SiaNet.Test/Im2ColTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ public void Im2Col_2d()
2424
[TestMethod]
2525
public void DiagTest()
2626
{
27-
//Global.UseGpu();
27+
Global.UseGpu();
2828
Tensor x = Tensor.FromArray(Global.Device, new float[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 });
2929
x = x.Reshape(3, 3);
3030

Tensor/TensorSharp/Cuda/CudaBasicOps.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
using System.Linq;
1717
using System.Text;
1818
using TensorSharp.Core;
19+
using TensorSharp.Cuda.DeviceCode;
1920
using TensorSharp.CUDA.DeviceCode;
2021
using TensorSharp.CUDA.KernelOps;
2122
using TensorSharp.CUDA.MatrixMul;
@@ -61,6 +62,8 @@ public class CudaBasicOps
6162
/// </summary>
6263
private readonly ReduceDimIndexKernels reduceDimIndexKernels = new ReduceDimIndexKernels();
6364

65+
private readonly MatrixOps matrixOps = new MatrixOps();
66+
6467

6568
/// <summary>
6669
/// Initializes a new instance of the <see cref="CudaBasicOps"/> class.
@@ -986,5 +989,11 @@ public Tensor StdAll(Tensor result, Tensor src)
986989
return writeTarget;
987990
}
988991

992+
993+
[RegisterOpStorageType("diag", typeof(CudaStorage))]
994+
public Tensor Diag(Tensor src)
995+
{
996+
return matrixOps.Diag(src);
997+
}
989998
}
990999
}

Tensor/TensorSharp/Cuda/DeviceCode/CU/MatrixOps.c

Lines changed: 24 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -2,69 +2,17 @@
22
// tensor, dimension 'dim' is skipped. The tensors are assumed to have the same
33
// size (with the exception of 't2' in dimension 'dim').
44
// This version uses a static number of dimensions.
5-
template <typename IndexType, int Dims>
6-
struct IndexToScatterGatherOffsets {
7-
static __device__ void compute(
8-
IndexType linearId, const int dim,
9-
const TensorInfo<IndexType>& index, IndexType* indexOffset,
10-
const TensorInfo<IndexType>& t1, IndexType* t1Offset,
11-
const TensorInfo<IndexType>& t2, IndexType* t2Offset) {
12-
for (int d = Dims - 1; d >= 0; d--) {
13-
IndexType curDimIndex = linearId % index.sizes[d];
14-
*indexOffset += curDimIndex * index.strides[d];
15-
*t1Offset += curDimIndex * t1.strides[d];
16-
if (d != dim) {
17-
*t2Offset += curDimIndex * t2.strides[d];
18-
}
19-
linearId /= index.sizes[d];
20-
}
21-
}
22-
23-
static __device__ void compute(
24-
IndexType linearId, const int dim,
25-
const TensorInfo<IndexType>& index, IndexType* indexOffset,
26-
const TensorInfo<IndexType>& t2, IndexType* t2Offset) {
27-
for (int d = Dims - 1; d >= 0; d--) {
28-
IndexType curDimIndex = linearId % index.sizes[d];
29-
*indexOffset += curDimIndex * index.strides[d];
30-
if (d != dim) {
31-
*t2Offset += curDimIndex * t2.strides[d];
32-
}
33-
linearId /= index.sizes[d];
34-
}
35-
}
36-
};
37-
385
// Same as above but using a dynamic number of dimensions.
396
template <typename IndexType>
40-
struct IndexToScatterGatherOffsets<IndexType, -1> {
7+
struct DiagOffsets<IndexType, -1> {
418
static __device__ void compute(
42-
IndexType linearId, const int dim,
43-
const TensorInfo<IndexType>& index, IndexType* indexOffset,
44-
const TensorInfo<IndexType>& t1, IndexType* t1Offset,
45-
const TensorInfo<IndexType>& t2, IndexType* t2Offset) {
46-
for (int d = index.dims - 1; d >= 0; d--) {
47-
IndexType curDimIndex = linearId % index.sizes[d];
48-
*indexOffset += curDimIndex * index.strides[d];
49-
*t1Offset += curDimIndex * t1.strides[d];
50-
if (d != dim) {
51-
*t2Offset += curDimIndex * t2.strides[d];
9+
IndexType linearId, const int dim, const TensorInfo<IndexType>& t, IndexType* tOffset) {
10+
for (int d = t.dims - 1; d >= 0; d--) {
11+
IndexType curDimIndex = linearId % t.sizes[d];
12+
*tOffset += curDimIndex * t.strides[d];
5213
}
53-
linearId /= index.sizes[d];
54-
}
55-
}
5614

57-
static __device__ void compute(
58-
IndexType linearId, const int dim,
59-
const TensorInfo<IndexType>& index, IndexType* indexOffset,
60-
const TensorInfo<IndexType>& t2, IndexType* t2Offset) {
61-
for (int d = index.dims - 1; d >= 0; d--) {
62-
IndexType curDimIndex = linearId % index.sizes[d];
63-
*indexOffset += curDimIndex * index.strides[d];
64-
if (d != dim) {
65-
*t2Offset += curDimIndex * t2.strides[d];
66-
}
67-
linearId /= index.sizes[d];
15+
linearId /= t.sizes[d];
6816
}
6917
}
7018
};
@@ -75,20 +23,27 @@ __global__ void diag_kernel(
7523
TensorInfo<IndexType> tensor,
7624
TensorInfo<IndexType> src,
7725
const IndexType totalElements) {
78-
for (IndexType linearId = blockIdx.x * blockDim.x + threadIdx.x; linearId < totalElements; linearId += gridDim.x * blockDim.x) {
79-
IndexType tensorOffset = 0;
80-
IndexType srcOffset = 0;
81-
IndexType indexOffset = 0;
26+
for (IndexType i = blockIdx.x * blockDim.x + threadIdx.x; i < totalElements; i += gridDim.x * blockDim.x) {
27+
for (IndexType j = blockIdx.x * blockDim.x + threadIdx.x; j < totalElements; j += gridDim.x * blockDim.x) {
28+
IndexType tensorOffset = 0;
29+
IndexType srcOffset = 0;
8230

83-
IndexToScatterGatherOffsets<IndexType, Dims>::compute(linearId, dim,
84-
index, &indexOffset,
85-
tensor, &tensorOffset,
86-
src, &srcOffset);
31+
DiagOffsets<IndexType>::compute(i, dim, tensor, &tensorOffset);
32+
DiagOffsets<IndexType>::compute(i, dim, src, &srcOffset);
8733

88-
IndexType indexValue = (IndexType)index.data[indexOffset];
89-
srcOffset += indexValue * src.strides[dim];
34+
if (i == j)
35+
{
36+
IndexType indexValue = (IndexType)src.data[tensorOffset];
37+
srcOffset += indexValue * src.strides[dim];
9038

91-
tensor.data[tensorOffset] = src.data[srcOffset];
39+
tensor.data[tensorOffset] = src.data[srcOffset];
40+
}
41+
else
42+
{
43+
tensor.data[tensorOffset] = 0;
44+
}
45+
46+
}
9247
}
9348
};
9449

Tensor/TensorSharp/Cuda/DeviceCode/MatrixKernels.cs

Lines changed: 1 addition & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -32,116 +32,6 @@ namespace TensorSharp.CUDA.DeviceCode
3232
[Precompile]
3333
public class MatrixKernels : CudaCode
3434
{
35-
/// <summary>
36-
/// The code
37-
/// </summary>
38-
public static string Code = @"
39-
// Compute the offsets into the given tensors for a linear index. For the 't2'
40-
// tensor, dimension 'dim' is skipped. The tensors are assumed to have the same
41-
// size (with the exception of 't2' in dimension 'dim').
42-
// This version uses a static number of dimensions.
43-
template <typename IndexType, int Dims>
44-
struct IndexToScatterGatherOffsets {
45-
static __device__ void compute(
46-
IndexType linearId, const int dim,
47-
const TensorInfo<IndexType>& index, IndexType* indexOffset,
48-
const TensorInfo<IndexType>& t1, IndexType* t1Offset,
49-
const TensorInfo<IndexType>& t2, IndexType* t2Offset) {
50-
for (int d = Dims - 1; d >= 0; d--) {
51-
IndexType curDimIndex = linearId % index.sizes[d];
52-
*indexOffset += curDimIndex * index.strides[d];
53-
*t1Offset += curDimIndex * t1.strides[d];
54-
if (d != dim) {
55-
*t2Offset += curDimIndex * t2.strides[d];
56-
}
57-
linearId /= index.sizes[d];
58-
}
59-
}
60-
61-
static __device__ void compute(
62-
IndexType linearId, const int dim,
63-
const TensorInfo<IndexType>& index, IndexType* indexOffset,
64-
const TensorInfo<IndexType>& t2, IndexType* t2Offset) {
65-
for (int d = Dims - 1; d >= 0; d--) {
66-
IndexType curDimIndex = linearId % index.sizes[d];
67-
*indexOffset += curDimIndex * index.strides[d];
68-
if (d != dim) {
69-
*t2Offset += curDimIndex * t2.strides[d];
70-
}
71-
linearId /= index.sizes[d];
72-
}
73-
}
74-
};
75-
76-
// Same as above but using a dynamic number of dimensions.
77-
template <typename IndexType>
78-
struct IndexToScatterGatherOffsets<IndexType, -1> {
79-
static __device__ void compute(
80-
IndexType linearId, const int dim,
81-
const TensorInfo<IndexType>& index, IndexType* indexOffset,
82-
const TensorInfo<IndexType>& t1, IndexType* t1Offset,
83-
const TensorInfo<IndexType>& t2, IndexType* t2Offset) {
84-
for (int d = index.dims - 1; d >= 0; d--) {
85-
IndexType curDimIndex = linearId % index.sizes[d];
86-
*indexOffset += curDimIndex * index.strides[d];
87-
*t1Offset += curDimIndex * t1.strides[d];
88-
if (d != dim) {
89-
*t2Offset += curDimIndex * t2.strides[d];
90-
}
91-
linearId /= index.sizes[d];
92-
}
93-
}
94-
95-
static __device__ void compute(
96-
IndexType linearId, const int dim,
97-
const TensorInfo<IndexType>& index, IndexType* indexOffset,
98-
const TensorInfo<IndexType>& t2, IndexType* t2Offset) {
99-
for (int d = index.dims - 1; d >= 0; d--) {
100-
IndexType curDimIndex = linearId % index.sizes[d];
101-
*indexOffset += curDimIndex * index.strides[d];
102-
if (d != dim) {
103-
*t2Offset += curDimIndex * t2.strides[d];
104-
}
105-
linearId /= index.sizes[d];
106-
}
107-
}
108-
};
109-
110-
111-
template <typename IndexType, int Dims>
112-
__global__ void diag_kernel(
113-
TensorInfo<IndexType> tensor,
114-
TensorInfo<IndexType> src,
115-
const IndexType totalElements) {
116-
for (IndexType linearId = blockIdx.x * blockDim.x + threadIdx.x; linearId < totalElements; linearId += gridDim.x * blockDim.x) {
117-
IndexType tensorOffset = 0;
118-
IndexType srcOffset = 0;
119-
IndexType indexOffset = 0;
120-
121-
IndexToScatterGatherOffsets<IndexType, Dims>::compute(linearId, dim,
122-
index, &indexOffset,
123-
tensor, &tensorOffset,
124-
src, &srcOffset);
125-
126-
IndexType indexValue = (IndexType)index.data[indexOffset];
127-
srcOffset += indexValue * src.strides[dim];
128-
129-
tensor.data[tensorOffset] = src.data[srcOffset];
130-
}
131-
};
132-
133-
#define DECLARE_DIAG(KERNEL_NAME, INDEX_TYPE, DIMS) \
134-
extern ""C"" {\
135-
__global__ void KERNEL_NAME(\
136-
TensorInfo<INDEX_TYPE> tensor,\
137-
TensorInfo<INDEX_TYPE> src,\
138-
INDEX_TYPE totalElements)\
139-
{\
140-
diag_kernel<INDEX_TYPE, DIMS>(tensor, src, totalElements);\
141-
}\
142-
}
143-
";
144-
14535
/// <summary>
14636
/// The diag matrix base name
14737
/// </summary>
@@ -161,8 +51,7 @@ public MatrixKernels() : base(GetCode(), "General", "ReduceApplyUtils")
16151
/// <returns>System.String.</returns>
16252
private static string GetCode()
16353
{
164-
Code = Resources.MatrixOps;
165-
var sb = new StringBuilder(Code);
54+
var sb = new StringBuilder(Resources.MatrixOps);
16655

16756
sb.AppendLine(GetMacroInvocations(true, 1));
16857
sb.AppendLine(GetMacroInvocations(true, 2));
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Text;
4+
using TensorSharp.CUDA;
5+
using TensorSharp.CUDA.DeviceCode;
6+
7+
namespace TensorSharp.Cuda.KernelOps
8+
{
9+
[OpsClass]
10+
public class MatrixOps
11+
{
12+
private readonly MatrixKernels matrixKernels = new MatrixKernels();
13+
14+
public MatrixOps()
15+
{
16+
17+
}
18+
19+
[RegisterOpStorageType("diag", typeof(CudaStorage))]
20+
public Tensor Diag(Tensor src) { return matrixKernels.Diag(src); }
21+
}
22+
}

Tensor/TensorSharp/TensorSharp.csproj

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,6 @@
3333
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
3434
</PropertyGroup>
3535

36-
<ItemGroup>
37-
<Compile Remove="Cuda\DeviceCode\MatrixKernels.cs" />
38-
</ItemGroup>
39-
4036
<ItemGroup>
4137
<PackageReference Include="System.Drawing.Common" Version="4.5.1" />
4238
<PackageReference Include="System.Drawing.Primitives" Version="4.3.0" />
@@ -62,9 +58,6 @@
6258
</ItemGroup>
6359

6460
<ItemGroup>
65-
<None Update="Cuda\DeviceCode\CU\GatherSelect.cu">
66-
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
67-
</None>
6861
<None Update="Cuda\DeviceCode\CU\MatrixOps.c">
6962
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
7063
</None>

0 commit comments

Comments
 (0)