From 3df9cb040885136016953355331a230d656dd5e5 Mon Sep 17 00:00:00 2001
From: Jack Lee <jack.lee@mathpresso.com>
Date: Wed, 29 Jun 2022 16:50:30 +0900
Subject: [PATCH 1/7] fix typos

---
 src/transformers/models/layoutlmv3/modeling_layoutlmv3.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py b/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
index f3bdd2cd8d9067..8d580cfd472418 100644
--- a/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
+++ b/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
@@ -64,7 +64,7 @@
         input_ids (`torch.LongTensor` of shape `{0}`):
             Indices of input sequence tokens in the vocabulary.
 
-            Indices can be obtained using [`LayoutLMv2Tokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            Indices can be obtained using [`LayoutLMv3Tokenizer`]. See [`PreTrainedTokenizer.encode`] and
             [`PreTrainedTokenizer.__call__`] for details.
 
             [What are input IDs?](../glossary#input-ids)
@@ -975,7 +975,7 @@ def __init__(self, config):
 
         self.init_weights()
 
-    @add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
+    @add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
     @replace_return_docstrings(output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC)
     def forward(
         self,
@@ -1084,7 +1084,7 @@ def __init__(self, config):
 
         self.init_weights()
 
-    @add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
+    @add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
     @replace_return_docstrings(output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC)
     def forward(
         self,
@@ -1214,7 +1214,7 @@ def __init__(self, config):
 
         self.init_weights()
 
-    @add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
+    @add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
     @replace_return_docstrings(output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC)
     def forward(
         self,

From c6778c82fb62dc13a0040e9f08c38fb4e912cd96 Mon Sep 17 00:00:00 2001
From: Jack Lee <jack.lee@mathpresso.com>
Date: Wed, 29 Jun 2022 17:59:37 +0900
Subject: [PATCH 2/7] fix sequence_length docs of LayoutLMv3Model

---
 .../models/layoutlmv3/modeling_layoutlmv3.py  | 39 ++++++++++++-------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py b/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
index 8d580cfd472418..dd0938289df3c3 100644
--- a/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
+++ b/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
@@ -61,9 +61,9 @@
 
 LAYOUTLMV3_INPUTS_DOCSTRING = r"""
     Args:
-        input_ids (`torch.LongTensor` of shape `{0}`):
+        input_ids (`torch.LongTensor` of shape `({0})`):
             Indices of input sequence tokens in the vocabulary.
-
+            {1}
             Indices can be obtained using [`LayoutLMv3Tokenizer`]. See [`PreTrainedTokenizer.encode`] and
             [`PreTrainedTokenizer.__call__`] for details.
 
@@ -74,29 +74,31 @@
             config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
             format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
             y1) represents the position of the lower right corner.
-
+            {1}
         pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
-            Batch of document images.
+            Batch of document images. Each Image is divided into patches of shape `(num_channels, config.patch_size,
+            config.patch_size)` and the total number of patches (=`patch_sequence_length`) equals to 
+            `((height / config.patch_size) * (width / config.patch_size))`.
 
-        attention_mask (`torch.FloatTensor` of shape `{0}`, *optional*):
+        attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
             Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
 
             - 1 for tokens that are **not masked**,
             - 0 for tokens that are **masked**.
-
+            {1}
             [What are attention masks?](../glossary#attention-mask)
-        token_type_ids (`torch.LongTensor` of shape `{0}`, *optional*):
+        token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
             Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
             1]`:
 
             - 0 corresponds to a *sentence A* token,
             - 1 corresponds to a *sentence B* token.
-
+            {1}
             [What are token type IDs?](../glossary#token-type-ids)
-        position_ids (`torch.LongTensor` of shape `{0}`, *optional*):
+        position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
             Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
             config.max_position_embeddings - 1]`.
-
+            {1}
             [What are position IDs?](../glossary#position-ids)
         head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
             Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
@@ -104,7 +106,7 @@
             - 1 indicates the head is **not masked**,
             - 0 indicates the head is **masked**.
 
-        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
+        inputs_embeds (`torch.FloatTensor` of shape `({0}, hidden_size)`, *optional*):
             Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
             is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
             model's internal embedding lookup matrix.
@@ -118,6 +120,11 @@
             Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
 """
 
+LAYOUTLMV3MODEL_INPUTS_DOCSTRING = r"""
+            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for 
+            [CLS] token. See `pixel_values` for `patch_sequence_length`.
+"""
+
 
 class LayoutLMv3PatchEmbeddings(nn.Module):
     """LayoutLMv3 image (patch) embeddings. This class also automatically interpolates the position embeddings for varying
@@ -763,7 +770,9 @@ def forward_image(self, pixel_values):
 
         return embeddings
 
-    @add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    @add_start_docstrings_to_model_forward(
+        LAYOUTLMV3_INPUTS_DOCSTRING.format("batch_size, token_sequence_length", LAYOUTLMV3MODEL_INPUTS_DOCSTRING)
+    )
     @replace_return_docstrings(output_type=BaseModelOutput, config_class=_CONFIG_FOR_DOC)
     def forward(
         self,
@@ -975,7 +984,7 @@ def __init__(self, config):
 
         self.init_weights()
 
-    @add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    @add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("batch_size, sequence_length", ""))
     @replace_return_docstrings(output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC)
     def forward(
         self,
@@ -1084,7 +1093,7 @@ def __init__(self, config):
 
         self.init_weights()
 
-    @add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    @add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("batch_size, sequence_length", ""))
     @replace_return_docstrings(output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC)
     def forward(
         self,
@@ -1214,7 +1223,7 @@ def __init__(self, config):
 
         self.init_weights()
 
-    @add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    @add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("batch_size, sequence_length", ""))
     @replace_return_docstrings(output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC)
     def forward(
         self,

From 5ce35361c4c4d9b559cccb6223bb6966cab4666b Mon Sep 17 00:00:00 2001
From: Jack Lee <jack.lee@mathpresso.com>
Date: Wed, 29 Jun 2022 18:28:13 +0900
Subject: [PATCH 3/7] delete trailing white spaces

---
 src/transformers/models/layoutlmv3/modeling_layoutlmv3.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py b/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
index dd0938289df3c3..71bdecb83b9fb2 100644
--- a/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
+++ b/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
@@ -77,7 +77,7 @@
             {1}
         pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
             Batch of document images. Each Image is divided into patches of shape `(num_channels, config.patch_size,
-            config.patch_size)` and the total number of patches (=`patch_sequence_length`) equals to 
+            config.patch_size)` and the total number of patches (=`patch_sequence_length`) equals to
             `((height / config.patch_size) * (width / config.patch_size))`.
 
         attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
@@ -121,7 +121,7 @@
 """
 
 LAYOUTLMV3MODEL_INPUTS_DOCSTRING = r"""
-            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for 
+            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for
             [CLS] token. See `pixel_values` for `patch_sequence_length`.
 """
 

From 3a2ab3ab1c6b76a2bb890a3c63326eb61c177a6a Mon Sep 17 00:00:00 2001
From: Jack Lee <jack.lee@mathpresso.com>
Date: Tue, 5 Jul 2022 13:36:49 +0900
Subject: [PATCH 4/7] fix layoutlmv3 docs more

---
 src/transformers/models/layoutlmv3/modeling_layoutlmv3.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py b/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
index 71bdecb83b9fb2..7347251330e2db 100644
--- a/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
+++ b/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
@@ -54,7 +54,7 @@
     behavior.
 
     Parameters:
-        config ([`LayoutLMv2Config`]): Model configuration class with all the parameters of the model.
+        config ([`LayoutLMv3Config`]): Model configuration class with all the parameters of the model.
             Initializing with a config file does not load the weights associated with the model, only the
             configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
 """
@@ -76,7 +76,7 @@
             y1) represents the position of the lower right corner.
             {1}
         pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
-            Batch of document images. Each Image is divided into patches of shape `(num_channels, config.patch_size,
+            Batch of document images. Each image is divided into patches of shape `(num_channels, config.patch_size,
             config.patch_size)` and the total number of patches (=`patch_sequence_length`) equals to
             `((height / config.patch_size) * (width / config.patch_size))`.
 
@@ -120,7 +120,7 @@
             Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
 """
 
-LAYOUTLMV3MODEL_INPUTS_DOCSTRING = r"""
+LAYOUTLMV3_INPUT_CLARIFICATION = r"""
             Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for
             [CLS] token. See `pixel_values` for `patch_sequence_length`.
 """
@@ -771,7 +771,7 @@ def forward_image(self, pixel_values):
         return embeddings
 
     @add_start_docstrings_to_model_forward(
-        LAYOUTLMV3_INPUTS_DOCSTRING.format("batch_size, token_sequence_length", LAYOUTLMV3MODEL_INPUTS_DOCSTRING)
+        LAYOUTLMV3_INPUTS_DOCSTRING.format("batch_size, token_sequence_length", LAYOUTLMV3_INPUT_CLARIFICATION)
     )
     @replace_return_docstrings(output_type=BaseModelOutput, config_class=_CONFIG_FOR_DOC)
     def forward(

From 74b9886c0666858da8d52296087aace6934e1149 Mon Sep 17 00:00:00 2001
From: Wonseok Lee <pocca2048@gmail.com>
Date: Wed, 6 Jul 2022 02:06:40 +0000
Subject: [PATCH 5/7] apply make fixup & quality

---
 .../models/layoutlmv3/modeling_layoutlmv3.py   | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py b/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
index 7347251330e2db..44990e3487b1c6 100644
--- a/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
+++ b/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
@@ -63,7 +63,9 @@
     Args:
         input_ids (`torch.LongTensor` of shape `({0})`):
             Indices of input sequence tokens in the vocabulary.
+
             {1}
+
             Indices can be obtained using [`LayoutLMv3Tokenizer`]. See [`PreTrainedTokenizer.encode`] and
             [`PreTrainedTokenizer.__call__`] for details.
 
@@ -74,18 +76,22 @@
             config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
             format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
             y1) represents the position of the lower right corner.
+
             {1}
+
         pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
             Batch of document images. Each image is divided into patches of shape `(num_channels, config.patch_size,
-            config.patch_size)` and the total number of patches (=`patch_sequence_length`) equals to
-            `((height / config.patch_size) * (width / config.patch_size))`.
+            config.patch_size)` and the total number of patches (=`patch_sequence_length`) equals to `((height /
+            config.patch_size) * (width / config.patch_size))`.
 
         attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
             Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
 
             - 1 for tokens that are **not masked**,
             - 0 for tokens that are **masked**.
+
             {1}
+
             [What are attention masks?](../glossary#attention-mask)
         token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
             Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
@@ -93,12 +99,16 @@
 
             - 0 corresponds to a *sentence A* token,
             - 1 corresponds to a *sentence B* token.
+
             {1}
+
             [What are token type IDs?](../glossary#token-type-ids)
         position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
             Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
             config.max_position_embeddings - 1]`.
+
             {1}
+
             [What are position IDs?](../glossary#position-ids)
         head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
             Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
@@ -121,8 +131,8 @@
 """
 
 LAYOUTLMV3_INPUT_CLARIFICATION = r"""
-            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for
-            [CLS] token. See `pixel_values` for `patch_sequence_length`.
+            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
+            token. See `pixel_values` for `patch_sequence_length`.
 """
 
 

From d605858cb6d2269291071fbb5f308a3bc3aedf67 Mon Sep 17 00:00:00 2001
From: Jack Lee <jack.lee@mathpresso.com>
Date: Thu, 11 Aug 2022 15:37:50 +0900
Subject: [PATCH 6/7] change to two versions of input docstring

---
 .../models/layoutlmv3/modeling_layoutlmv3.py  | 87 ++++++++++++++++---
 1 file changed, 74 insertions(+), 13 deletions(-)

diff --git a/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py b/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
index 44990e3487b1c6..979c0fbcbbae9b 100644
--- a/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
+++ b/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
@@ -59,12 +59,13 @@
             configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
 """
 
-LAYOUTLMV3_INPUTS_DOCSTRING = r"""
+LAYOUTLMV3_MODEL_INPUTS_DOCSTRING = r"""
     Args:
         input_ids (`torch.LongTensor` of shape `({0})`):
             Indices of input sequence tokens in the vocabulary.
 
-            {1}
+            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
+            token. See `pixel_values` for `patch_sequence_length`.
 
             Indices can be obtained using [`LayoutLMv3Tokenizer`]. See [`PreTrainedTokenizer.encode`] and
             [`PreTrainedTokenizer.__call__`] for details.
@@ -77,7 +78,8 @@
             format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
             y1) represents the position of the lower right corner.
 
-            {1}
+            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
+            token. See `pixel_values` for `patch_sequence_length`.
 
         pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
             Batch of document images. Each image is divided into patches of shape `(num_channels, config.patch_size,
@@ -90,7 +92,8 @@
             - 1 for tokens that are **not masked**,
             - 0 for tokens that are **masked**.
 
-            {1}
+            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
+            token. See `pixel_values` for `patch_sequence_length`.
 
             [What are attention masks?](../glossary#attention-mask)
         token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
@@ -100,14 +103,16 @@
             - 0 corresponds to a *sentence A* token,
             - 1 corresponds to a *sentence B* token.
 
-            {1}
+            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
+            token. See `pixel_values` for `patch_sequence_length`.
 
             [What are token type IDs?](../glossary#token-type-ids)
         position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
             Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
             config.max_position_embeddings - 1]`.
 
-            {1}
+            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
+            token. See `pixel_values` for `patch_sequence_length`.
 
             [What are position IDs?](../glossary#position-ids)
         head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
@@ -130,9 +135,65 @@
             Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
 """
 
-LAYOUTLMV3_INPUT_CLARIFICATION = r"""
-            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
-            token. See `pixel_values` for `patch_sequence_length`.
+LAYOUTLMV3_DOWNSTREAM_INPUTS_DOCSTRING = r"""
+    Args:
+        input_ids (`torch.LongTensor` of shape `({0})`):
+            Indices of input sequence tokens in the vocabulary.
+
+            Indices can be obtained using [`LayoutLMv3Tokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            [`PreTrainedTokenizer.__call__`] for details.
+
+            [What are input IDs?](../glossary#input-ids)
+
+        bbox (`torch.LongTensor` of shape `({0}, 4)`, *optional*):
+            Bounding boxes of each input sequence tokens. Selected in the range `[0,
+            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
+            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
+            y1) represents the position of the lower right corner.
+
+        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
+            Batch of document images. Each image is divided into patches of shape `(num_channels, config.patch_size,
+            config.patch_size)` and the total number of patches (=`patch_sequence_length`) equals to `((height /
+            config.patch_size) * (width / config.patch_size))`.
+
+        attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
+            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+
+            [What are attention masks?](../glossary#attention-mask)
+        token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
+            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
+            1]`:
+
+            - 0 corresponds to a *sentence A* token,
+            - 1 corresponds to a *sentence B* token.
+
+            [What are token type IDs?](../glossary#token-type-ids)
+        position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
+            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
+            config.max_position_embeddings - 1]`.
+
+            [What are position IDs?](../glossary#position-ids)
+        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
+            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
+
+            - 1 indicates the head is **not masked**,
+            - 0 indicates the head is **masked**.
+
+        inputs_embeds (`torch.FloatTensor` of shape `({0}, hidden_size)`, *optional*):
+            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
+            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
+            model's internal embedding lookup matrix.
+        output_attentions (`bool`, *optional*):
+            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
+            tensors for more detail.
+        output_hidden_states (`bool`, *optional*):
+            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+            more detail.
+        return_dict (`bool`, *optional*):
+            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
 """
 
 
@@ -781,7 +842,7 @@ def forward_image(self, pixel_values):
         return embeddings
 
     @add_start_docstrings_to_model_forward(
-        LAYOUTLMV3_INPUTS_DOCSTRING.format("batch_size, token_sequence_length", LAYOUTLMV3_INPUT_CLARIFICATION)
+        LAYOUTLMV3_MODEL_INPUTS_DOCSTRING.format("batch_size, token_sequence_length")
     )
     @replace_return_docstrings(output_type=BaseModelOutput, config_class=_CONFIG_FOR_DOC)
     def forward(
@@ -994,7 +1055,7 @@ def __init__(self, config):
 
         self.init_weights()
 
-    @add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("batch_size, sequence_length", ""))
+    @add_start_docstrings_to_model_forward(LAYOUTLMV3_DOWNSTREAM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
     @replace_return_docstrings(output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC)
     def forward(
         self,
@@ -1103,7 +1164,7 @@ def __init__(self, config):
 
         self.init_weights()
 
-    @add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("batch_size, sequence_length", ""))
+    @add_start_docstrings_to_model_forward(LAYOUTLMV3_DOWNSTREAM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
     @replace_return_docstrings(output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC)
     def forward(
         self,
@@ -1233,7 +1294,7 @@ def __init__(self, config):
 
         self.init_weights()
 
-    @add_start_docstrings_to_model_forward(LAYOUTLMV3_INPUTS_DOCSTRING.format("batch_size, sequence_length", ""))
+    @add_start_docstrings_to_model_forward(LAYOUTLMV3_DOWNSTREAM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
     @replace_return_docstrings(output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC)
     def forward(
         self,

From 0c90b28da6e077a752f2e69021f73ca54d5e5b7f Mon Sep 17 00:00:00 2001
From: Wonseok Lee <pocca2048@gmail.com>
Date: Thu, 11 Aug 2022 07:46:20 +0000
Subject: [PATCH 7/7] apply make fixup & quality

---
 .../models/layoutlmv3/modeling_layoutlmv3.py         | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py b/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
index 979c0fbcbbae9b..68987e38e9942e 100644
--- a/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
+++ b/src/transformers/models/layoutlmv3/modeling_layoutlmv3.py
@@ -1055,7 +1055,9 @@ def __init__(self, config):
 
         self.init_weights()
 
-    @add_start_docstrings_to_model_forward(LAYOUTLMV3_DOWNSTREAM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
+    @add_start_docstrings_to_model_forward(
+        LAYOUTLMV3_DOWNSTREAM_INPUTS_DOCSTRING.format("batch_size, sequence_length")
+    )
     @replace_return_docstrings(output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC)
     def forward(
         self,
@@ -1164,7 +1166,9 @@ def __init__(self, config):
 
         self.init_weights()
 
-    @add_start_docstrings_to_model_forward(LAYOUTLMV3_DOWNSTREAM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
+    @add_start_docstrings_to_model_forward(
+        LAYOUTLMV3_DOWNSTREAM_INPUTS_DOCSTRING.format("batch_size, sequence_length")
+    )
     @replace_return_docstrings(output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC)
     def forward(
         self,
@@ -1294,7 +1298,9 @@ def __init__(self, config):
 
         self.init_weights()
 
-    @add_start_docstrings_to_model_forward(LAYOUTLMV3_DOWNSTREAM_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
+    @add_start_docstrings_to_model_forward(
+        LAYOUTLMV3_DOWNSTREAM_INPUTS_DOCSTRING.format("batch_size, sequence_length")
+    )
     @replace_return_docstrings(output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC)
     def forward(
         self,