Source code for oscopilot.modules.base_module
import re
import json
import os
from oscopilot.utils.llms import OpenAI,LLAMA
# from oscopilot.environments.py_env import PythonEnv
# from oscopilot.environments.py_jupyter_env import PythonJupyterEnv
from oscopilot.environments import Env
from oscopilot.utils import get_os_version
from dotenv import load_dotenv
load_dotenv(dotenv_path='.env', override=True)
MODEL_TYPE = os.getenv('MODEL_TYPE')
[docs]
class BaseModule:
def __init__(self):
"""
Initializes a new instance of BaseModule with default values for its attributes.
"""
if MODEL_TYPE == "OpenAI":
self.llm = OpenAI()
elif MODEL_TYPE == "LLAMA":
self.llm = LLAMA()
# self.environment = PythonEnv()
# self.environment = PythonJupyterEnv()
self.environment = Env()
self.system_version = get_os_version()
[docs]
def extract_information(self, message, begin_str='[BEGIN]', end_str='[END]'):
"""
Extracts substrings from a message that are enclosed within specified begin and end markers.
Args:
message (str): The message from which information is to be extracted.
begin_str (str): The marker indicating the start of the information to be extracted.
end_str (str): The marker indicating the end of the information to be extracted.
Returns:
list[str]: A list of extracted substrings found between the begin and end markers.
"""
result = []
_begin = message.find(begin_str)
_end = message.find(end_str)
while not (_begin == -1 or _end == -1):
result.append(message[_begin + len(begin_str):_end].lstrip("\n"))
message = message[_end + len(end_str):]
_begin = message.find(begin_str)
_end = message.find(end_str)
return result
[docs]
def extract_json_from_string(self, text):
"""
Identifies and extracts JSON data embedded within a given string.
This method searches for JSON data within a string, specifically looking for
JSON blocks that are marked with ```json``` notation. It attempts to parse
and return the first JSON object found.
Args:
text (str): The text containing the JSON data to be extracted.
Returns:
dict: The parsed JSON data as a dictionary if successful.
str: An error message indicating a parsing error or that no JSON data was found.
"""
# Improved regular expression to find JSON data within a string
json_regex = r'```json\s*\n\{[\s\S]*?\n\}\s*```'
# Search for JSON data in the text
matches = re.findall(json_regex, text)
# Extract and parse the JSON data if found
if matches:
# Removing the ```json and ``` from the match to parse it as JSON
json_data = matches[0].replace('```json', '').replace('```', '').strip()
try:
# Parse the JSON data
parsed_json = json.loads(json_data)
return parsed_json
except json.JSONDecodeError as e:
return f"Error parsing JSON data: {e}"
else:
return "No JSON data found in the string."
[docs]
def extract_list_from_string(self, text):
"""
Extracts a list of task descriptions from a given string containing enumerated tasks.
This function ensures that only text immediately following a numbered bullet is captured,
and it stops at the first newline character or at the next number, preventing the inclusion of subsequent non-numbered lines or empty lines.
Parameters:
text (str): A string containing multiple enumerated tasks. Each task is numbered and followed by its description.
Returns:
list[str]: A list of strings, each representing the description of a task extracted from the input string.
"""
# Regular expression pattern:
# \d+\. matches one or more digits followed by a dot, indicating the task number.
# \s+ matches one or more whitespace characters after the dot.
# ([^\n]*?) captures any sequence of characters except newlines (non-greedy) as the task description.
# (?=\n\d+\.|\n\Z|\n\n) is a positive lookahead that matches a position followed by either a newline with digits and a dot (indicating the start of the next task),
# or the end of the string, or two consecutive newlines (indicating a break between tasks or end of content).
task_pattern = r'\d+\.\s+([^\n]*?)(?=\n\d+\.|\n\Z|\n\n)'
# Use the re.findall function to search for all matches of the pattern in the input text.
data_list = re.findall(task_pattern, text)
# Return the list of matched task descriptions.
return data_list