From a532711e698a01d66e852434fd7161b339d4f564 Mon Sep 17 00:00:00 2001 From: mike2ox Date: Wed, 22 Aug 2018 16:59:01 +0900 Subject: [PATCH] =?UTF-8?q?#28=20:=20=EC=9B=90=EB=AC=B8=20=EC=95=88?= =?UTF-8?q?=EC=9D=98=20python=20code=20=EA=B8=B0=EC=9E=85=EC=99=84?= =?UTF-8?q?=EB=A3=8C,=20seq2seq=20=EA=B0=9C=EB=85=90=EA=B9=8C=EC=A7=80=20?= =?UTF-8?q?=EB=B2=88=EC=97=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ..._sequence-to-sequence_learning_in_Keras.md | 239 ++++++++++++++++++ 1 file changed, 239 insertions(+) create mode 100644 28_A_ten-minute_introduction_to_sequence-to-sequence_learning_in_Keras.md diff --git a/28_A_ten-minute_introduction_to_sequence-to-sequence_learning_in_Keras.md b/28_A_ten-minute_introduction_to_sequence-to-sequence_learning_in_Keras.md new file mode 100644 index 0000000..f1e4af4 --- /dev/null +++ b/28_A_ten-minute_introduction_to_sequence-to-sequence_learning_in_Keras.md @@ -0,0 +1,239 @@ +## Keras를 이용해 seq2seq를 10분안에 알려주기 +원문 : [A ten-minute introduction to sequence-to-sequence learning in Keras](https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html) +> 본 문서는 케라스를 이용해 RNN(Recurrent Neural Networks)모델인 Seq2Seq를 10분안에 알려주는 튜토리얼 한글버전입니다. Seq2Seq의 의미부터 케라스를 이용한 모델 구현을 다루고 있으며 본 문서 대상자는 recurrent networks와 keras에 대한 경험이 있다는 가정하에 진행합니다. + +* Keras +* RNN +* LSTM +* NLP +* Seq2Seq +* GRU layer + +#### sequence-to-sequence 학습이란? +sequence-to-sequence(Seq2Seq)은 한 도메인(예: 영문장)에서 다른 도메인(예: 불어로 된 문장)으로 문장을 변환하는 모델 학습에 대한 것입니다. + +```bash + "the cat sat on the mat" -> [Seq2Seq model] -> "le chat etait assis sur le tapis" +``` + +이 모델은 기계 번역 혹은 자유로운 질의응답에 사용됩니다.(주어진 자연어 질문에 대한 자연어 응답 생성) +--일반적으로, 텍스트를 생성할 때마다 적용 가능합니다. + +해당 작업을 다루기 위해 여러가지 방법이(**RNN**을 사용하거나 **1D convnets**를 사용) 있습니다. + +#### 자명한(명확한) 사실 : 입출력 문장이 동일한 길이일 때 +입출력 두가지 문장이 동일한 길이일 경우, 우리는 케라스의 Long Short-Term Memory models(LSTM) 혹은 GRU 계층(혹은 stack thereof) + +![The trivial case](media/28_0.png) + +#### 일반적인 사례 : 표준 sequence-to-sequence + +![seq2seq-teacher-forcing](media/28_1.png) + +추론 방식(예: 알 수 없는 입력 문장을 해독하려고 할 때)에선 약간 다른 처리를 거치게 됩니다. + +![seq2seq-inference](media/28_2.png) +#### 케라스 예제 + +```python +from keras.models import Model +from keras.layers import Input, LSTM, Dense + +# 입력 문장의 정의와 처리 +encoder_inputs = Input(shape=(None, num_encoder_tokens)) +encoder = LSTM(latent_dim, return_state=True) +encoder_outputs, state_h, state_c = encoder(encoder_inputs) +# We discard `encoder_outputs` and only keep the states. +encoder_states = [state_h, state_c] + +# Set up the decoder, using `encoder_states` as initial state. +decoder_inputs = Input(shape=(None, num_decoder_tokens)) +# We set up our decoder to return full output sequences, +# and to return internal states as well. We don't use the +# return states in the training model, but we will use them in inference. +decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True) +decoder_outputs, _, _ = decoder_lstm(decoder_inputs, + initial_state=encoder_states) +decoder_dense = Dense(num_decoder_tokens, activation='softmax') +decoder_outputs = decoder_dense(decoder_outputs) + +# Define the model that will turn +# `encoder_input_data` & `decoder_input_data` into `decoder_target_data` +model = Model([encoder_inputs, decoder_inputs], decoder_outputs) +``` + +```python +# 학습 실행 +model.compile(optimizer='rmsprop', loss='categorical_crossentropy') +model.fit([encoder_input_data, decoder_input_data], decoder_target_data, + batch_size=batch_size, + epochs=epochs, + validation_split=0.2) + +``` + +```python +encoder_model = Model(encoder_inputs, encoder_states) + +decoder_state_input_h = Input(shape=(latent_dim,)) +decoder_state_input_c = Input(shape=(latent_dim,)) +decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] +decoder_outputs, state_h, state_c = decoder_lstm( + decoder_inputs, initial_state=decoder_states_inputs) +decoder_states = [state_h, state_c] +decoder_outputs = decoder_dense(decoder_outputs) +decoder_model = Model( + [decoder_inputs] + decoder_states_inputs, + [decoder_outputs] + decoder_states) +``` + +```python +def decode_sequence(input_seq): + # Encode the input as state vectors. + states_value = encoder_model.predict(input_seq) + + # Generate empty target sequence of length 1. + target_seq = np.zeros((1, 1, num_decoder_tokens)) + # Populate the first character of target sequence with the start character. + target_seq[0, 0, target_token_index['\t']] = 1. + + # Sampling loop for a batch of sequences + # (to simplify, here we assume a batch of size 1). + stop_condition = False + decoded_sentence = '' + while not stop_condition: + output_tokens, h, c = decoder_model.predict( + [target_seq] + states_value) + + # Sample a token + sampled_token_index = np.argmax(output_tokens[0, -1, :]) + sampled_char = reverse_target_char_index[sampled_token_index] + decoded_sentence += sampled_char + + # Exit condition: either hit max length + # or find stop character. + if (sampled_char == '\n' or + len(decoded_sentence) > max_decoder_seq_length): + stop_condition = True + + # Update the target sequence (of length 1). + target_seq = np.zeros((1, 1, num_decoder_tokens)) + target_seq[0, 0, sampled_token_index] = 1. + + # Update states + states_value = [h, c] + + return decoded_sentence +``` + +```bash +Input sentence: Be nice. +Decoded sentence: Soyez gentil ! +- +Input sentence: Drop it! +Decoded sentence: Laissez tomber ! +- +Input sentence: Get out! +Decoded sentence: Sortez ! +``` + +### 참고문서 +* [Sequence to Sequence Learning with Neural Networks](https://arxiv.org/abs/1409.3215) +* [Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation](https://arxiv.org/abs/1406.1078) + +---- + +### 추가 FAQ + +#### LSTM대신 GRU 계층을 사용하려면 어떻게 해야합니까? +```python +encoder_inputs = Input(shape=(None, num_encoder_tokens)) +encoder = GRU(latent_dim, return_state=True) +encoder_outputs, state_h = encoder(encoder_inputs) + +decoder_inputs = Input(shape=(None, num_decoder_tokens)) +decoder_gru = GRU(latent_dim, return_sequences=True) +decoder_outputs = decoder_gru(decoder_inputs, initial_state=state_h) +decoder_dense = Dense(num_decoder_tokens, activation='softmax') +decoder_outputs = decoder_dense(decoder_outputs) +model = Model([encoder_inputs, decoder_inputs], decoder_outputs) +``` + + +#### 정수 문장이 포함된 단어단계 모델을 사용하려면 어떻게 해야합니까? +```python +# Define an input sequence and process it. +encoder_inputs = Input(shape=(None,)) +x = Embedding(num_encoder_tokens, latent_dim)(encoder_inputs) +x, state_h, state_c = LSTM(latent_dim, + return_state=True)(x) +encoder_states = [state_h, state_c] + +# Set up the decoder, using `encoder_states` as initial state. +decoder_inputs = Input(shape=(None,)) +x = Embedding(num_decoder_tokens, latent_dim)(decoder_inputs) +x = LSTM(latent_dim, return_sequences=True)(x, initial_state=encoder_states) +decoder_outputs = Dense(num_decoder_tokens, activation='softmax')(x) + +# Define the model that will turn +# `encoder_input_data` & `decoder_input_data` into `decoder_target_data` +model = Model([encoder_inputs, decoder_inputs], decoder_outputs) + +# Compile & run training +model.compile(optimizer='rmsprop', loss='categorical_crossentropy') +# Note that `decoder_target_data` needs to be one-hot encoded, +# rather than sequences of integers like `decoder_input_data`! +model.fit([encoder_input_data, decoder_input_data], decoder_target_data, + batch_size=batch_size, + epochs=epochs, + validation_split=0.2) +``` + +#### 학습하는 동안 teacher forcing(?)를 사용하지 않으려면 어떻게 해야 합니까? +```python +from keras.layers import Lambda +from keras import backend as K + +# The first part is unchanged +encoder_inputs = Input(shape=(None, num_encoder_tokens)) +encoder = LSTM(latent_dim, return_state=True) +encoder_outputs, state_h, state_c = encoder(encoder_inputs) +states = [state_h, state_c] + +# Set up the decoder, which will only process one timestep at a time. +decoder_inputs = Input(shape=(1, num_decoder_tokens)) +decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True) +decoder_dense = Dense(num_decoder_tokens, activation='softmax') + +all_outputs = [] +inputs = decoder_inputs +for _ in range(max_decoder_seq_length): + # Run the decoder on one timestep + outputs, state_h, state_c = decoder_lstm(inputs, + initial_state=states) + outputs = decoder_dense(outputs) + # Store the current prediction (we will concatenate all predictions later) + all_outputs.append(outputs) + # Reinject the outputs as inputs for the next loop iteration + # as well as update the states + inputs = outputs + states = [state_h, state_c] + +# Concatenate all predictions +decoder_outputs = Lambda(lambda x: K.concatenate(x, axis=1))(all_outputs) + +# Define and compile model as previously +model = Model([encoder_inputs, decoder_inputs], decoder_outputs) +model.compile(optimizer='rmsprop', loss='categorical_crossentropy') + +# Prepare decoder input data that just contains the start character +# Note that we could have made it a constant hard-coded in the model +decoder_input_data = np.zeros((num_samples, 1, num_decoder_tokens)) +decoder_input_data[:, 0, target_token_index['\t']] = 1. + +# Train model as previously +model.fit([encoder_input_data, decoder_input_data], decoder_target_data, + batch_size=batch_size, + epochs=epochs, + validation_split=0.2) +``` \ No newline at end of file