Blog Detail

BC20270371's blog
Gbugbhubhubgyvjygviyygj
BC20270371 @ 2025-3-25 10:41:23
import os
import sys
import json
import re
import random

# --- Helper: Automatic Package Installation ---
def install_package(package_name, mirror_url):
    """Automatically installs a package using a given mirror."""
    print(f"\n🌐 Installing {package_name} using mirror {mirror_url} ...\n")
    os.system(f"python -m pip install {package_name} -i {mirror_url}")

# --- Step 1: Check and install 'requests' if necessary ---
try:
    import requests
except ImportError:
    install_package("requests", "https://pypi.tuna.tsinghua.edu.cn/simple")
    try:
        import requests
    except ImportError:
        print("\n🚨 Failed to install requests. Please install manually.\n")
        sys.exit(1)

# --- Step 2: Check and install 'nltk' if necessary ---
try:
    import nltk
    from nltk.corpus import stopwords
    from nltk.tokenize import word_tokenize
except ImportError:
    install_package("nltk", "https://pypi.tuna.tsinghua.edu.cn/simple")
    try:
        import nltk
        from nltk.corpus import stopwords
        from nltk.tokenize import word_tokenize
    except ImportError:
        print("\n🚨 Failed to install nltk. Please install manually.\n")
        sys.exit(1)

# Download required NLTK data if not present.
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')
try:
    nltk.data.find('corpora/stopwords')
except LookupError:
    nltk.download('stopwords')

# --- Step 3: Check and install 'transformers' and 'torch' if necessary ---
try:
    from transformers import GPT2LMHeadModel, GPT2Tokenizer
except ImportError:
    install_package("transformers", "https://pypi.tuna.tsinghua.edu.cn/simple")
    install_package("torch", "https://pypi.tuna.tsinghua.edu.cn/simple")
    try:
        from transformers import GPT2LMHeadModel, GPT2Tokenizer
    except ImportError:
        print("\n🚨 Failed to install transformers/torch. Please install manually.\n")
        sys.exit(1)

# --- NLP Processor ---
class NLPProcessor:
    def __init__(self):
        # Extend NLTK's stopwords with some basic Chinese stopwords.
        self.stop_words = set(stopwords.words('english')).union({"我", "的", "了", "在", "是"})
    
    def clean_text(self, text):
        """Tokenizes text, removes non-alphanumeric tokens and stopwords."""
        tokens = word_tokenize(text.lower())
        filtered = [word for word in tokens if word.isalnum() and word not in self.stop_words]
        return " ".join(filtered)

# --- Thinking Chain System ---
class ThinkingChain:
    def reason_step_by_step(self, question):
        """Provides step-by-step reasoning for complex questions."""
        q = question.lower()
        if q.startswith("why"):
            return ("Let's break down the causes step by step: first, consider the underlying factors; "
                    "next, see how these factors interact; then, we reach the conclusion.")
        elif q.startswith("how") and "are you" not in q:
            return ("Let's analyze it in stages: first, identify the main components; then, examine their interactions; "
                    "finally, synthesize the overall process.")
        else:
            return ""
    
    def suggest_followup(self, answer):
        """Suggests a follow-up question."""
        return "Does that answer your question? Would you like to discuss this further?"

# --- Analyzer System ---
class Analyzer:
    def break_down(self, text):
        """Splits text into sentences and returns key points."""
        sentences = re.split(r'(?<=[.!?])\s+', text)
        key_points = [s for s in sentences if len(s) > 20]
        return key_points[:3]
    
    def compare_sources(self, res1, res2):
        """Compares two texts and returns the better summary if similar."""
        words1 = set(res1.lower().split())
        words2 = set(res2.lower().split())
        common = words1.intersection(words2)
        sim = len(common) / max(len(words1), len(words2)) if words1 and words2 else 0
        if sim > 0.6:
            return res1 if len(res1) >= len(res2) else res2
        else:
            return f"Source 1: {res1}\nSource 2: {res2}"
    
    def analyze(self, topic, bing_res, yandex_res):
        """Merges and summarizes search results."""
        if not bing_res and not yandex_res:
            return "No reliable information found online."
        if bing_res and yandex_res:
            merged = self.compare_sources(bing_res, yandex_res)
        elif bing_res:
            merged = bing_res
        else:
            merged = yandex_res
        breakdown = self.break_down(merged)
        summary = ". ".join(breakdown)
        return summary if summary else merged

# --- Offline LLM using GPT-2 ---
class OfflineLLM:
    def __init__(self):
        try:
            self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
            self.model = GPT2LMHeadModel.from_pretrained("gpt2")
        except Exception as e:
            print("Error loading GPT-2 model:", e)
            self.tokenizer = None
            self.model = None

    def generate_response(self, prompt, max_length=100):
        """Generates a response using GPT-2."""
        if self.tokenizer is None or self.model is None:
            return ""
        try:
            input_ids = self.tokenizer.encode(prompt, return_tensors="pt")
            outputs = self.model.generate(
                input_ids, max_length=max_length, num_return_sequences=1,
                no_repeat_ngram_size=2, do_sample=True, top_p=0.95, top_k=50
            )
            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            return response
        except Exception:
            return ""

# --- CGG-EUSO Chatbot ---
class CGG_EUSO:
    def __init__(self):
        self.knowledge_file = "knowledge.json"
        self.responses_file = "responses.json"
        self.nlp_processor = NLPProcessor()
        self.thinking_chain = ThinkingChain()
        self.analyzer = Analyzer()
        self.llm = OfflineLLM()  # Offline LLM for casual conversation
        self.load_data()
        # Predefined greetings for casual conversation
        self.greeting_phrases = {"hi", "hello", "hey", "yo", "good morning", "good afternoon", "good evening"}
        # Fallback casual responses if LLM fails
        self.casual_responses = [
            "I'm doing great, thanks for asking!",
            "All good here! How about you?",
            "I'm here and ready to chat!",
            "Hey there! What's up?"
        ]

    def load_data(self):
        """Loads stored knowledge and responses from JSON files."""
        try:
            with open(self.knowledge_file, "r", encoding="utf-8") as f:
                self.knowledge = json.load(f)
        except FileNotFoundError:
            self.knowledge = {}
        try:
            with open(self.responses_file, "r", encoding="utf-8") as f:
                self.responses = json.load(f)
        except FileNotFoundError:
            self.responses = {}

    def save_data(self):
        """Saves learned knowledge and responses to JSON files."""
        with open(self.knowledge_file, "w", encoding="utf-8") as f:
            json.dump(self.knowledge, f, ensure_ascii=False, indent=4)
        with open(self.responses_file, "w", encoding="utf-8") as f:
            json.dump(self.responses, f, ensure_ascii=False, indent=4)

    def check_online(self):
        """Checks if the computer is online by pinging Bing."""
        try:
            requests.get("http://www.bing.com", timeout=5)
            return True
        except Exception:
            return False

    # --- Basic NLP Preprocessing ---
    def nlp_preprocess(self, text):
        """Cleans text using our NLP processor."""
        text = re.sub(r"<.*?>", " ", text)  # Remove HTML tags
        text = re.sub(r"\s+", " ", text)      # Remove extra whitespace
        return self.nlp_processor.clean_text(text)

    # --- Summarize text ---
    def summarize_text(self, text, max_len=300):
        """Returns a summary of text up to max_len characters."""
        if len(text) > max_len:
            return text[:max_len] + "..."
        return text

    # --- Multi-Source Searching ---
    def search_bing(self, query):
        """Searches Bing for the query."""
        try:
            resp = requests.get(f"https://www.bing.com/search?q={query}", timeout=5)
            if resp and resp.text:
                cleaned = self.nlp_preprocess(resp.text)
                return self.summarize_text(cleaned)
        except Exception:
            pass
        return ""

    def search_yandex(self, query):
        """Searches Yandex for the query."""
        try:
            resp = requests.get(f"https://yandex.com/search/?text={query}", timeout=5)
            if resp and resp.text:
                cleaned = self.nlp_preprocess(resp.text)
                return self.summarize_text(cleaned)
        except Exception:
            pass
        return ""

    def fact_check(self, query):
        """
        Searches both Bing and Yandex, then uses the Analyzer and Thinking Chain
        to generate a well-reasoned, summarized answer.
        """
        bing_res = self.search_bing(query)
        yandex_res = self.search_yandex(query)
        analyzed = self.analyzer.analyze(query, bing_res, yandex_res)
        reasoning = self.thinking_chain.reason_step_by_step(query)
        if reasoning:
            return f"{reasoning}\n\n{analyzed}"
        return analyzed

    # --- Knowledge & Response Storage ---
    def learn_knowledge(self, topic, info):
        """Stores new knowledge if not already present or if new info is more detailed."""
        if topic not in self.knowledge or len(info) > len(self.knowledge[topic]):
            self.knowledge[topic] = info
            self.save_data()

    def learn_response(self, question, answer):
        """Stores a direct response for a given query."""
        if question not in self.responses:
            self.responses[question] = answer
            self.save_data()

    # --- Predict & Suggest Related Questions ---
    def predict_questions(self, user_input):
        suggestions = []
        for question in self.knowledge:
            if user_input.lower() in question.lower():
                suggestions.append(question)
            else:
                sim = self.similarity(user_input, question)
                if sim > 0.3:
                    suggestions.append(question)
        return suggestions

    def similarity(self, text1, text2):
        """Computes a simple similarity score based on word overlap."""
        words1 = set(text1.lower().split())
        words2 = set(text2.lower().split())
        if not words1 or not words2:
            return 0
        common = words1.intersection(words2)
        return len(common) / max(len(words1), len(words2))

    # --- Casual Chat using Offline LLM ---
    def generate_llm_response(self, prompt, max_length=100):
        """Generates a natural conversation response using the offline LLM (GPT-2)."""
        if self.llm.tokenizer is None or self.llm.model is None:
            return ""
        try:
            input_ids = self.llm.tokenizer.encode(prompt, return_tensors="pt")
            outputs = self.llm.model.generate(
                input_ids, max_length=max_length, num_return_sequences=1,
                no_repeat_ngram_size=2, do_sample=True, top_p=0.95, top_k=50
            )
            response = self.llm.tokenizer.decode(outputs[0], skip_special_tokens=True)
            return response
        except Exception:
            return ""

    def casual_chat(self, user_input):
        """
        Handles casual conversation using the offline LLM.
        If the LLM fails, falls back to predefined casual responses.
        """
        prompt = f"User said: '{user_input}'. Respond in a friendly, casual tone."
        llm_response = self.generate_llm_response(prompt)
        if llm_response:
            return llm_response
        else:
            return random.choice(self.casual_responses)

    # --- Main Respond Method ---
    def respond(self, user_input):
        """
        Generates a response:
         - For factual queries (identified by keywords), it searches online via fact_check.
         - For casual or greeting queries, it uses casual_chat.
         - If a stored response exists, it returns that.
         - If offline and unknown, it prompts the user to teach it.
         - Also suggests related topics.
        """
        lower_input = user_input.lower()
        factual_keywords = {"what", "who", "when", "where", "why"}
        is_factual = any(word in lower_input.split() for word in factual_keywords)
        if "how are you" in lower_input:
            is_factual = False

        # If greeting or non-factual, handle via casual chat.
        if lower_input in self.greeting_phrases or not is_factual:
            return self.casual_chat(user_input)

        if user_input in self.responses:
            return self.responses[user_input]

        if user_input in self.knowledge:
            return f"I found this in my knowledge: {self.knowledge[user_input]}"

        if self.check_online():
            answer = self.fact_check(user_input)
            if answer and "No reliable information found online." not in answer:
                self.learn_knowledge(user_input, answer)
            else:
                answer = "No reliable information found online."
        else:
            print("\n🤔 I don't know this yet. Please teach me!")
            new_response = input("Enter a response: ")
            self.learn_response(user_input, new_response)
            return "Got it! I'll remember this."

        suggestions = self.predict_questions(user_input)
        if suggestions:
            answer += "\n\nYou might also be interested in these related topics:\n" + "\n".join(suggestions)
        return answer

    def show_code(self):
        """Displays stored knowledge and responses for debugging."""
        print("\n📜 **CGG-EUSO Code Check Mode** 📜")
        print("\nKnowledge stored:")
        print(json.dumps(self.knowledge, ensure_ascii=False, indent=4))
        print("\nResponses stored:")
        print(json.dumps(self.responses, ensure_ascii=False, indent=4))

# --- Step 7: Run the Chatbot ---
bot = CGG_EUSO()

print("\n💬 CGG-EUSO is ready! Choose a mode:")
print("1️⃣ Chat Mode")
print("2️⃣ Code Check Mode")

while True:
    mode = input("\nSelect (1/2): ").strip()
    if mode == "1":
        print("\n💬 Enter 'exit' to quit.\n")
        while True:
            user_input = input("You: ")
            if user_input.lower() == "exit":
                print("\nGoodbye! 👋")
                break
            response = bot.respond(user_input)
            print(f"CGG-EUSO: {response}")
        break
    elif mode == "2":
        bot.show_code()
        break
    else:
        print("\n❌ Invalid choice! Please enter '1' or '2'.")
Gbugbhubhubgyvjygviyygj

Status

Development

Support

Gbugbhubhubgyvjygviyygj

Status

Development

Support

Don't have an account?

SIGN IN