AbanteAI · biobootloader · Feb 21, 2024 · Feb 23, 2024 · Feb 23, 2024 · Feb 23, 2024
diff --git a/mentat/config.py b/mentat/config.py
@@ -98,6 +98,16 @@ class Config:
         },
         converter=converters.optional(converters.to_bool),
     )
+    two_step_edits: bool = attr.field(
+        default=False,
+        metadata={
+            "description": (
+                "Experimental feature that uses multiple LLM calls to make and parse"
+                " edits"
+            ),
+            "auto_completions": bool_autocomplete,
+        },
+    )
     revisor: bool = attr.field(
         default=False,
         metadata={

diff --git a/mentat/conversation.py b/mentat/conversation.py
@@ -15,17 +15,16 @@
     ChatCompletionUserMessageParam,
 )
 
-from mentat.llm_api_handler import (
-    TOKEN_COUNT_WARNING,
-    count_tokens,
-    get_max_tokens,
-    prompt_tokens,
-)
+from mentat.llm_api_handler import count_tokens, get_max_tokens, prompt_tokens
 from mentat.parsers.file_edit import FileEdit
 from mentat.parsers.parser import ParsedLLMResponse
 from mentat.session_context import SESSION_CONTEXT
+from mentat.stream_model_response import (
+    get_two_step_system_prompt,
+    stream_model_response,
+    stream_model_response_two_step,
+)
 from mentat.transcripts import ModelMessage, TranscriptMessage, UserMessage
-from mentat.utils import add_newline
 
 
 class MentatAssistantMessageParam(ChatCompletionAssistantMessageParam):
@@ -171,7 +170,10 @@ def get_messages(
             return _messages
         else:
             parser = config.parser
-            prompt = parser.get_system_prompt()
+            if session_context.config.two_step_edits:
+                prompt = get_two_step_system_prompt()
+            else:
+                prompt = parser.get_system_prompt()
             prompt_message: ChatCompletionMessageParam = (
                 ChatCompletionSystemMessageParam(
                     role="system",
@@ -184,78 +186,6 @@ def clear_messages(self) -> None:
         """Clears the messages in the conversation"""
         self._messages = list[ChatCompletionMessageParam]()
 
-    async def _stream_model_response(
-        self,
-        messages: list[ChatCompletionMessageParam],
-    ) -> ParsedLLMResponse:
-        session_context = SESSION_CONTEXT.get()
-        stream = session_context.stream
-        code_file_manager = session_context.code_file_manager
-        config = session_context.config
-        parser = config.parser
-        llm_api_handler = session_context.llm_api_handler
-        cost_tracker = session_context.cost_tracker
-
-        stream.send(
-            None,
-            channel="loading",
-        )
-        response = await llm_api_handler.call_llm_api(
-            messages,
-            config.model,
-            stream=True,
-            response_format=parser.response_format(),
-        )
-        stream.send(
-            None,
-            channel="loading",
-            terminate=True,
-        )
-
-        num_prompt_tokens = prompt_tokens(messages, config.model)
-        stream.send(f"Total token count: {num_prompt_tokens}", style="info")
-        if num_prompt_tokens > TOKEN_COUNT_WARNING:
-            stream.send(
-                "Warning: LLM performance drops off rapidly at large context sizes. Use"
-                " /clear to clear context or use /exclude to exclude any uneccessary"
-                " files.",
-                style="warning",
-            )
-
-        stream.send("Streaming... use control-c to interrupt the model at any point\n")
-        async with parser.interrupt_catcher():
-            parsed_llm_response = await parser.stream_and_parse_llm_response(
-                add_newline(response)
-            )
-        # Sampler and History require previous_file_lines
-        for file_edit in parsed_llm_response.file_edits:
-            file_edit.previous_file_lines = code_file_manager.file_lines.get(
-                file_edit.file_path, []
-            )
-        if not parsed_llm_response.interrupted:
-            cost_tracker.display_last_api_call()
-        else:
-            # Generator doesn't log the api call if we interrupt it
-            cost_tracker.log_api_call_stats(
-                num_prompt_tokens,
-                count_tokens(
-                    parsed_llm_response.full_response, config.model, full_message=False
-                ),
-                config.model,
-                display=True,
-            )
-
-        messages.append(
-            ChatCompletionAssistantMessageParam(
-                role="assistant", content=parsed_llm_response.full_response
-            )
-        )
-        self.add_model_message(
-            parsed_llm_response.full_response, messages, parsed_llm_response
-        )
-
-        return parsed_llm_response
-
     async def get_model_response(self) -> ParsedLLMResponse:
         session_context = SESSION_CONTEXT.get()
         stream = session_context.stream
@@ -285,7 +215,10 @@ async def get_model_response(self) -> ParsedLLMResponse:
         )
 
         try:
-            response = await self._stream_model_response(messages_snapshot)
+            if session_context.config.two_step_edits:
+                response = await stream_model_response_two_step(messages_snapshot)
+            else:
+                response = await stream_model_response(messages_snapshot)
         except RateLimitError:
             stream.send(
                 "Rate limit error received from OpenAI's servers using model"
@@ -294,6 +227,14 @@ async def get_model_response(self) -> ParsedLLMResponse:
                 style="error",
             )
             return ParsedLLMResponse("", "", list[FileEdit]())
+
+        messages_snapshot.append(
+            ChatCompletionAssistantMessageParam(
+                role="assistant", content=response.full_response
+            )
+        )
+        self.add_model_message(response.full_response, messages_snapshot, response)
+
         return response
 
     def remaining_context(self) -> int | None:

diff --git a/mentat/parsers/parser.py b/mentat/parsers/parser.py
@@ -35,6 +35,7 @@ class ParsedLLMResponse:
     full_response: str = attr.field()
     conversation: str = attr.field()
     file_edits: list[FileEdit] = attr.field()
+    rewritten_files: list[tuple[str, str]] = attr.field()
     interrupted: bool = attr.field(default=False)
 
 

diff --git a/mentat/resources/prompts/two_step_edit_prompt.txt b/mentat/resources/prompts/two_step_edit_prompt.txt
@@ -0,0 +1,14 @@
+**You are now operating within an advanced AI coding system designed to assist with code modifications and enhancements.**
+
+Upon receiving context, which may range from specific code snippets to entire repositories, you will be tasked with addressing coding requests or answering questions.
+
+**For your responses:**
+
+- **Directly address the request or question:** Provide concise instructions for any code modifications, clearly stating what changes need to be made.
+- **Specify modifications without reiterating existing code:** Guide the user on where and how to make modifications, e.g., "insert the new code block above the last function in the file" or "replace the existing loop condition with the provided snippet." Ensure instructions are clear without displaying how the entire file looks post-modification.
+- **Use the full file path at least once per file with edits:** When mentioning a file for the first time, use its full path. You can refer to it by a shorter name afterward if it remains clear which file you're discussing.
+- **Avoid suggesting non-actionable edits:** Do not recommend commenting out or non-specific removals. Be explicit about what to delete or change, referring to code blocks or functions by name and avoiding extensive verbatim rewrites.
+- **Minimize the inclusion of unchanged code:** Focus on the new or altered lines rather than embedding them within large blocks of unchanged code. Your guidance should be clear enough for an intelligent actor to implement with just the changes specified.
+- **Emphasize brevity and clarity:** Once you've provided detailed instructions for the edits, there's no need for further elaboration. Avoid concluding with summaries of how the code will look after the edits.
+
+**Your guidance should empower users to confidently implement the suggested changes with minimal and precise directions, fostering an efficient and clear modification process.**
diff --git a/mentat/resources/prompts/two_step_edit_prompt_list_files.txt b/mentat/resources/prompts/two_step_edit_prompt_list_files.txt
@@ -0,0 +1,10 @@
+You are part of an expert AI coding system.
+
+The next message will be an answer to a user's question or request. It may include suggested edits to code files. Your job is simply to extract the names of files that edits need to be made to, according to that message.
+
+In your response:
+    - respond in json, with a single key "files" and a value that is an array of strings
+    - return empty array if no files have suggested edits, e.g. {"files":[]} 
+    - the message may mention files without suggesting edits to them, do not include these. Only include files that have suggested edits
+    - if a file is meant to be created, include it in the list of files to edit 
+
diff --git a/mentat/resources/prompts/two_step_edit_prompt_rewrite_file.txt b/mentat/resources/prompts/two_step_edit_prompt_rewrite_file.txt
@@ -0,0 +1,11 @@
+You are part of an expert AI coding system.
+
+In the next message you will be given the contents of a code file. The user will then specify some edits to be made to the file.
+
+Your response should:
+    - rewrite the entire file, including all the requested edits
+    - wrap your entire response in ```
+    - do not include anything else in your response other than the code
+    - do not make any other changes to the code other than the requested edits, even to standardize formatting
+    - even formatting changes should not be made unless explicitly requested by the user
+    - if a change is not fully specified, do your best to follow the spirit of what was asked