clear TextCNN

graykode · graykode · commit a8eacb755655 · 2019-01-12T14:33:17.000+09:00
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+.idea
diff --git a/2-1.TextCNN/TextCNN-Tensor.py b/2-1.TextCNN/TextCNN-Tensor.py
@@ -58,7 +58,7 @@
     pooled_outputs.append(pooled) # dim of pooled : [batch_size(=6), output_height(=1), output_width(=1), channel(=3)]
 
 num_filters_total = num_filters * len(filter_sizes)
-h_pool = tf.concat(pooled_outputs, num_filters) # h_pool : [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3) * 3]
+h_pool = tf.concat(pooled_outputs, len(filter_sizes)) # h_pool : [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3) * 3]
 h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) # [batch_size, output_height * output_width * (channel * 3)]
 
 # Model-Training
diff --git a/2-1.TextCNN/TextCNN-Torch.py b/2-1.TextCNN/TextCNN-Torch.py
@@ -53,16 +53,16 @@ def forward(self, X):
 
         pooled_outputs = []
         for filter_size in filter_sizes:
-            # conv : [input_channel(=1), output_channel(=3), (filter_hleight, filter_width), bias_option]
+            # conv : [input_channel(=1), output_channel(=3), (filter_height, filter_width), bias_option]
             conv = nn.Conv2d(1, num_filters, (filter_size, embedding_size), bias=True)(embedded_chars)
             h = F.relu(conv)
-            # mp : ((filter_hleight, filter_width))
+            # mp : ((filter_height, filter_width))
             mp = nn.MaxPool2d((sequence_length - filter_size + 1, 1))
             # pooled : [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3)]
             pooled = mp(h).permute(0, 3, 2, 1)
             pooled_outputs.append(pooled)
 
-        h_pool = torch.cat(pooled_outputs, num_filters) # [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3) * 3]
+        h_pool = torch.cat(pooled_outputs, len(filter_sizes)) # [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3) * 3]
         h_pool_flat = torch.reshape(h_pool, [-1, self.num_filters_total]) # [batch_size(=6), output_height * output_width * (output_channel * 3)]
 
         model = torch.mm(h_pool_flat, self.Weight) + self.Bias # [batch_size, num_classes]
diff --git a/4-2.Attention/Attention-Torch.py b/4-2.Attention/Attention-Torch.py
@@ -1,6 +1,4 @@
-'''
-  code by Tae Hwan Jung(Jeff Jung) @graykode
-'''
+# code by Tae Hwan Jung(Jeff Jung) @graykode
 import numpy as np
 import torch
 import torch.nn as nn
@@ -10,35 +8,58 @@
 # S: Symbol that shows starting of decoding input
 # E: Symbol that shows starting of decoding output
 # P: Symbol that will fill in blank sequence if current batch data size is short than time steps
+sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']
 
-char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz ']
-num_dic = {n: i for i, n in enumerate(char_arr)}
-dic_len = len(num_dic)
+word_list = " ".join(sentences).split()
+word_list = list(set(word_list))
+word_dict = {w: i for i, w in enumerate(word_list)}
+n_class = len(word_dict)
 
 # Parameter
-max_len = 20
+max_len = 5 # 'S' or 'E' will be added (= n_step,seq_len)
 n_hidden = 128
-total_epoch = 10000
-n_class = dic_len
+batch_size = 1
 
-seq_data = [['Ich mochte ein bier', 'I want a BEER']]
+def make_batch(sentences):
+    input_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[0].split()]]]
+    output_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[1].split()]]]
+    target_batch = [[word_dict[n] for n in sentences[2].split()]]
 
-def make_batch(seq_data):
-    input_batch = []
-    output_batch = []
-    target_batch = []
+    # make tensor
+    return Variable(torch.Tensor(input_batch)), Variable(torch.Tensor(output_batch)), Variable(torch.LongTensor(target_batch))
+  
+class Attention(nn.Module):
+    def __init__(self):
+        super(Attention, self).__init__()
+        self.enc_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)
+        self.dec_cell = nn.RNN(input_size=n_class, hidden_size=n_hidden, dropout=0.5)
+        
+        # Linear for attention
+        self.attn = nn.Linear(n_hidden, n_hidden)
+        
+    def forward(self, enc_input, hidden, dec_input):
+        enc_input = enc_input.transpose(0, 1) # enc_input: [max_len(=n_step, time step), batch_size, n_hidden]
+        dec_input = dec_input.transpose(0, 1) # dec_input: [max_len(=n_step, time step), batch_size, n_hidden]
 
-    for seq in seq_data:
-        for i in range(2):
-            seq[i] = seq[i] + 'P' * (max_len - len(seq[i]))
+        # enc_outputs : [max_len, batch_size, num_directions(=1) * n_hidden(=1)]
+        # enc_states : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
+        enc_outputs, enc_states = self.enc_cell(enc_input, hidden)
+        dec_outputs, _ = self.dec_cell(dec_input, enc_states)
 
-        input = [num_dic[n] for n in seq[0]]
-        output = [num_dic[n] for n in ('S' + seq[1])]
-        target = [num_dic[n] for n in (seq[1] + 'E')]
+        
+        return dec_outputs
+      
+    def get_att_weight(self, hidden, enc_outputs):
+        attn_scores = Variable(torch.zeros(len(enc_outputs)))  # attn_scores : [n_step]
 
-        input_batch.append(np.eye(dic_len)[input])
-        output_batch.append(np.eye(dic_len)[output])
+    def get_att_score(self, hidden, encoder_hidden):
+        score = self.attn(encoder_hidden)
+        return torch.dot(hidden.view(-1), score.view(-1))
 
-        target_batch.append(target)
+input_batch, output_batch, target_batch = make_batch(sentences)
 
-    return input_batch, output_batch, target_batch
+# hidden : [num_layers(=1) * num_directions(=1), batch_size, n_hidden]
+hidden = Variable(torch.zeros(1, 1, n_hidden))
+
+model = Attention()
+output = model(input_batch, hidden, output_batch)