Blog Detail

BC20270371's blog
Huhhjhijh
BC20270371 @ 2025-3-23 21:13:10
import os
import sys
import json
import re
import random

# === Step 1: Check if `requests` is installed ===
try:
    import requests
except ImportError:
    print("\n⚠️ `requests` module not found! Checking internet connection...\n")
    def check_connection():
        try:
            os.system("ping -c 1 bing.com >nul 2>&1")  # Linux/macOS
            os.system("ping -n 1 bing.com >nul 2>&1")   # Windows
            return True
        except Exception:
            return False
    online = check_connection()
    if online:
        print("\n🌐 Internet detected! Installing `requests`...\n")
        os.system("pip install requests")
        try:
            import requests
            print("\n✅ `requests` installed successfully!\n")
        except ImportError:
            print("\n🚨 Installation failed! Please install manually: `pip install requests`\n")
            sys.exit(1)
    else:
        print("\n🚨 No internet detected! Running in offline mode...\n")
        class FakeRequests:
            def get(self, url, timeout=5):
                return None
        requests = FakeRequests()

# === Step 2: Check if nltk is installed ===
try:
    import nltk
    from nltk.corpus import stopwords
    from nltk.tokenize import word_tokenize
except ImportError:
    print("\n⚠️ `nltk` module not found! Please install it using: pip install nltk")
    sys.exit(1)

# Download required NLTK data if not already present.
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')
try:
    nltk.data.find('corpora/stopwords')
except LookupError:
    nltk.download('stopwords')

# === NLP Processor ===
class NLPProcessor:
    def __init__(self):
        # Minimal stopwords list, extended with some Chinese words
        self.stop_words = set(stopwords.words('english')).union({"我", "的", "了", "在", "是"})
    
    def clean_text(self, text):
        """Tokenizes text, removes non-alphanumeric tokens and stopwords, and returns cleaned string."""
        tokens = word_tokenize(text.lower())
        filtered = [word for word in tokens if word.isalnum() and word not in self.stop_words]
        return " ".join(filtered)

# === Thinking Chain System ===
class ThinkingChain:
    def reason_step_by_step(self, question):
        """Provides step-by-step reasoning for complex questions."""
        q = question.lower()
        if q.startswith("why"):
            return ("Let's break down the causes step by step: first, consider the underlying factors; "
                    "next, see how these factors interact; then, we reach the conclusion.")
        elif q.startswith("how") and "are you" not in q:
            return ("Let's analyze it in stages: first, identify the main components; then, examine how they work together; "
                    "finally, synthesize the overall process.")
        else:
            return ""
    
    def suggest_followup(self, answer):
        """Suggests a follow-up question to deepen the conversation."""
        return "Does that answer your question? Would you like to discuss this further?"

# === Analyzer System ===
class Analyzer:
    def break_down(self, text):
        """Splits text into sentences and returns the first few key points."""
        sentences = re.split(r'(?<=[.!?])\s+', text)
        key_points = [s for s in sentences if len(s) > 20]
        return key_points[:3]
    
    def compare_sources(self, res1, res2):
        """Compares two texts and returns the longer summary if they are similar."""
        words1 = set(res1.lower().split())
        words2 = set(res2.lower().split())
        common = words1.intersection(words2)
        sim = len(common) / max(len(words1), len(words2)) if words1 and words2 else 0
        if sim > 0.6:
            return res1 if len(res1) >= len(res2) else res2
        else:
            return f"Source 1: {res1}\nSource 2: {res2}"
    
    def analyze(self, topic, bing_res, yandex_res):
        """Analyzes and merges search results, then breaks them down into key points."""
        if not bing_res and not yandex_res:
            return "No reliable information found online."
        if bing_res and yandex_res:
            merged = self.compare_sources(bing_res, yandex_res)
        elif bing_res:
            merged = bing_res
        else:
            merged = yandex_res
        breakdown = self.break_down(merged)
        summary = ". ".join(breakdown)
        return summary if summary else merged

# === CGG-EUSO Chatbot ===
class CGG_EUSO:
    def __init__(self):
        self.knowledge_file = "knowledge.json"
        self.responses_file = "responses.json"
        self.nlp_processor = NLPProcessor()
        self.thinking_chain = ThinkingChain()
        self.analyzer = Analyzer()
        self.load_data()
        # Predefined greetings that trigger casual chat
        self.greeting_phrases = {"hi", "hello", "hey", "yo", "good morning", "good afternoon", "good evening"}
        # Predefined casual responses (personality)
        self.casual_responses = [
            "I'm doing great, thanks for asking!",
            "All good here! How about you?",
            "I'm here and ready to chat!",
            "Hey there! What's up?"
        ]

    def load_data(self):
        """Loads stored knowledge and responses from files."""
        try:
            with open(self.knowledge_file, "r", encoding="utf-8") as f:
                self.knowledge = json.load(f)
        except FileNotFoundError:
            self.knowledge = {}
        try:
            with open(self.responses_file, "r", encoding="utf-8") as f:
                self.responses = json.load(f)
        except FileNotFoundError:
            self.responses = {}

    def save_data(self):
        """Saves learned knowledge and responses to files."""
        with open(self.knowledge_file, "w", encoding="utf-8") as f:
            json.dump(self.knowledge, f, ensure_ascii=False, indent=4)
        with open(self.responses_file, "w", encoding="utf-8") as f:
            json.dump(self.responses, f, ensure_ascii=False, indent=4)

    def check_online(self):
        """Checks if the computer is online by pinging Bing."""
        try:
            requests.get("http://www.bing.com", timeout=5)
            return True
        except Exception:
            return False

    # --- Basic NLP Preprocessing ---
    def nlp_preprocess(self, text):
        """Cleans text using our NLP processor."""
        text = re.sub(r"<.*?>", " ", text)  # Remove HTML tags
        text = re.sub(r"\s+", " ", text)      # Remove extra whitespace
        return self.nlp_processor.clean_text(text)

    # --- Summarize text ---
    def summarize_text(self, text, max_len=300):
        """Returns a summary of text up to max_len characters."""
        if len(text) > max_len:
            return text[:max_len] + "..."
        return text

    # --- Multi-Source Searching ---
    def search_bing(self, query):
        """Searches Bing for the query."""
        try:
            resp = requests.get(f"https://www.bing.com/search?q={query}", timeout=5)
            if resp and resp.text:
                cleaned = self.nlp_preprocess(resp.text)
                return self.summarize_text(cleaned)
        except Exception:
            pass
        return ""

    def search_yandex(self, query):
        """Searches Yandex for the query."""
        try:
            # Yandex search URL – note: Yandex does not have a free public API like DuckDuckGo.
            resp = requests.get(f"https://yandex.com/search/?text={query}", timeout=5)
            if resp and resp.text:
                cleaned = self.nlp_preprocess(resp.text)
                return self.summarize_text(cleaned)
        except Exception:
            pass
        return ""

    def fact_check(self, query):
        """
        Searches both Bing and Yandex, then uses the Analyzer and Thinking Chain
        to generate a well-reasoned, summarized answer.
        """
        bing_res = self.search_bing(query)
        yandex_res = self.search_yandex(query)
        analyzed = self.analyzer.analyze(query, bing_res, yandex_res)
        reasoning = self.thinking_chain.reason_step_by_step(query)
        if reasoning:
            return f"{reasoning}\n\n{analyzed}"
        return analyzed

    # --- Knowledge & Response Storage ---
    def learn_knowledge(self, topic, info):
        """Stores new knowledge if it's not already present or if the new info is more detailed."""
        if topic not in self.knowledge or len(info) > len(self.knowledge[topic]):
            self.knowledge[topic] = info
            self.save_data()

    def learn_response(self, question, answer):
        """Stores a direct response for a given query."""
        if question not in self.responses:
            self.responses[question] = answer
            self.save_data()

    # --- Predict & Suggest Related Questions ---
    def predict_questions(self, user_input):
        suggestions = []
        for question in self.knowledge:
            if user_input.lower() in question.lower():
                suggestions.append(question)
            else:
                sim = self.similarity(user_input, question)
                if sim > 0.3:
                    suggestions.append(question)
        return suggestions

    def similarity(self, text1, text2):
        """Computes a simple similarity score based on word overlap."""
        words1 = set(text1.lower().split())
        words2 = set(text2.lower().split())
        if not words1 or not words2:
            return 0
        common = words1.intersection(words2)
        return len(common) / max(len(words1), len(words2))

    # --- Natural Chat (Casual Conversation) ---
    def casual_chat(self, user_input):
        """
        Handles casual conversation using a set of personality-based responses.
        This function will respond naturally to greetings and opinion-based queries.
        """
        # For greetings, return a random greeting response.
        if user_input.lower() in self.greeting_phrases:
            return random.choice(self.casual_responses)
        # For other casual inputs, echo or provide a friendly reply.
        return f"That's interesting! Tell me more about that."

    # --- Main Respond Method ---
    def respond(self, user_input):
        """
        Generates a response:
          - If a direct response is stored, returns it.
          - Else if knowledge exists, returns that.
          - Else, if the input seems factual (contains keywords like what, who, when, where, why),
            and we're online, it fact-checks and uses the Thinking Chain to generate an answer.
          - Otherwise, it engages in casual chat.
          - If offline and unknown, it asks the user to teach it a response.
        """
        # If the input is a greeting or casual, handle it without searching.
        lower_input = user_input.lower()
        factual_keywords = {"what", "who", "when", "where", "why"}
        is_factual = any(word in lower_input.split() for word in factual_keywords)
        # Special case: "how are you" is casual.
        if "how are you" in lower_input:
            is_factual = False

        if lower_input in self.greeting_phrases:
            return random.choice(self.casual_responses)

        if not is_factual:
            # For non-factual inputs, use casual chat.
            return self.casual_chat(user_input)

        # If a direct response is stored, return it.
        if user_input in self.responses:
            return self.responses[user_input]

        # If stored knowledge exists, return it.
        if user_input in self.knowledge:
            return f"I found this in my knowledge: {self.knowledge[user_input]}"

        # If unknown and online, search the web.
        if self.check_online():
            answer = self.fact_check(user_input)
            if answer and "No reliable information found online." not in answer:
                self.learn_knowledge(user_input, answer)
            else:
                answer = "No reliable information found online."
        else:
            print("\n🤔 I don't know this yet. Please teach me!")
            new_response = input("Enter a response: ")
            self.learn_response(user_input, new_response)
            return "Got it! I'll remember this."

        # After answering, suggest related questions.
        suggestions = self.predict_questions(user_input)
        if suggestions:
            answer += "\n\nYou might also be interested in these related topics:\n" + "\n".join(suggestions)
        return answer

    def show_code(self):
        """Displays stored knowledge and responses for debugging."""
        print("\n📜 **CGG-EUSO Code Check Mode** 📜")
        print("\nKnowledge stored:")
        print(json.dumps(self.knowledge, ensure_ascii=False, indent=4))
        print("\nResponses stored:")
        print(json.dumps(self.responses, ensure_ascii=False, indent=4))

# === Step 3: Run the Chatbot ===
bot = CGG_EUSO()

print("\n💬 CGG-EUSO is ready! Choose a mode:")
print("1️⃣ Chat Mode")
print("2️⃣ Code Check Mode")

while True:
    mode = input("\nSelect (1/2): ").strip()
    if mode == "1":
        print("\n💬 Enter 'exit' to quit.\n")
        while True:
            user_input = input("You: ")
            if user_input.lower() == "exit":
                print("\nGoodbye! 👋")
                break
            response = bot.respond(user_input)
            print(f"CGG-EUSO: {response}")
        break
    elif mode == "2":
        bot.show_code()
        break
    else:
        print("\n❌ Invalid choice! Please enter '1' or '2'.")
Huhhjhijh

Status

Development

Support

Huhhjhijh

Status

Development

Support

Don't have an account?

SIGN IN