import os
import sys
import json
import re
import random
# --- Helper: Automatic Package Installation ---
def install_package(package_name, mirror_url):
"""Automatically installs a package using a given mirror."""
print(f"\n🌐 Installing {package_name} using mirror {mirror_url} ...\n")
os.system(f"python -m pip install {package_name} -i {mirror_url}")
# --- Step 1: Check and install 'requests' if necessary ---
try:
import requests
except ImportError:
install_package("requests", "https://pypi.tuna.tsinghua.edu.cn/simple")
try:
import requests
except ImportError:
print("\n🚨 Failed to install requests. Please install manually.\n")
sys.exit(1)
# --- Step 2: Check and install 'nltk' if necessary ---
try:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
except ImportError:
install_package("nltk", "https://pypi.tuna.tsinghua.edu.cn/simple")
try:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
except ImportError:
print("\n🚨 Failed to install nltk. Please install manually.\n")
sys.exit(1)
# Download required NLTK data if not present.
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')
try:
nltk.data.find('corpora/stopwords')
except LookupError:
nltk.download('stopwords')
# --- Step 3: Check and install 'transformers' and 'torch' if necessary ---
try:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
except ImportError:
install_package("transformers", "https://pypi.tuna.tsinghua.edu.cn/simple")
install_package("torch", "https://pypi.tuna.tsinghua.edu.cn/simple")
try:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
except ImportError:
print("\n🚨 Failed to install transformers/torch. Please install manually.\n")
sys.exit(1)
# --- NLP Processor ---
class NLPProcessor:
def __init__(self):
# Extend NLTK's stopwords with some basic Chinese stopwords.
self.stop_words = set(stopwords.words('english')).union({"我", "的", "了", "在", "是"})
def clean_text(self, text):
"""Tokenizes text, removes non-alphanumeric tokens and stopwords."""
tokens = word_tokenize(text.lower())
filtered = [word for word in tokens if word.isalnum() and word not in self.stop_words]
return " ".join(filtered)
# --- Thinking Chain System ---
class ThinkingChain:
def reason_step_by_step(self, question):
"""Provides step-by-step reasoning for complex questions."""
q = question.lower()
if q.startswith("why"):
return ("Let's break down the causes step by step: first, consider the underlying factors; "
"next, see how these factors interact; then, we reach the conclusion.")
elif q.startswith("how") and "are you" not in q:
return ("Let's analyze it in stages: first, identify the main components; then, examine their interactions; "
"finally, synthesize the overall process.")
else:
return ""
def suggest_followup(self, answer):
"""Suggests a follow-up question."""
return "Does that answer your question? Would you like to discuss this further?"
# --- Analyzer System ---
class Analyzer:
def break_down(self, text):
"""Splits text into sentences and returns key points."""
sentences = re.split(r'(?<=[.!?])\s+', text)
key_points = [s for s in sentences if len(s) > 20]
return key_points[:3]
def compare_sources(self, res1, res2):
"""Compares two texts and returns the better summary if similar."""
words1 = set(res1.lower().split())
words2 = set(res2.lower().split())
common = words1.intersection(words2)
sim = len(common) / max(len(words1), len(words2)) if words1 and words2 else 0
if sim > 0.6:
return res1 if len(res1) >= len(res2) else res2
else:
return f"Source 1: {res1}\nSource 2: {res2}"
def analyze(self, topic, bing_res, yandex_res):
"""Merges and summarizes search results."""
if not bing_res and not yandex_res:
return "No reliable information found online."
if bing_res and yandex_res:
merged = self.compare_sources(bing_res, yandex_res)
elif bing_res:
merged = bing_res
else:
merged = yandex_res
breakdown = self.break_down(merged)
summary = ". ".join(breakdown)
return summary if summary else merged
# --- Offline LLM using GPT-2 ---
class OfflineLLM:
def __init__(self):
try:
self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
self.model = GPT2LMHeadModel.from_pretrained("gpt2")
except Exception as e:
print("Error loading GPT-2 model:", e)
self.tokenizer = None
self.model = None
def generate_response(self, prompt, max_length=100):
"""Generates a response using GPT-2."""
if self.tokenizer is None or self.model is None:
return ""
try:
input_ids = self.tokenizer.encode(prompt, return_tensors="pt")
outputs = self.model.generate(
input_ids, max_length=max_length, num_return_sequences=1,
no_repeat_ngram_size=2, do_sample=True, top_p=0.95, top_k=50
)
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
except Exception:
return ""
# --- CGG-EUSO Chatbot ---
class CGG_EUSO:
def __init__(self):
self.knowledge_file = "knowledge.json"
self.responses_file = "responses.json"
self.nlp_processor = NLPProcessor()
self.thinking_chain = ThinkingChain()
self.analyzer = Analyzer()
self.llm = OfflineLLM() # Offline LLM for casual conversation
self.load_data()
# Predefined greetings for casual conversation
self.greeting_phrases = {"hi", "hello", "hey", "yo", "good morning", "good afternoon", "good evening"}
# Fallback casual responses if LLM fails
self.casual_responses = [
"I'm doing great, thanks for asking!",
"All good here! How about you?",
"I'm here and ready to chat!",
"Hey there! What's up?"
]
def load_data(self):
"""Loads stored knowledge and responses from JSON files."""
try:
with open(self.knowledge_file, "r", encoding="utf-8") as f:
self.knowledge = json.load(f)
except FileNotFoundError:
self.knowledge = {}
try:
with open(self.responses_file, "r", encoding="utf-8") as f:
self.responses = json.load(f)
except FileNotFoundError:
self.responses = {}
def save_data(self):
"""Saves learned knowledge and responses to JSON files."""
with open(self.knowledge_file, "w", encoding="utf-8") as f:
json.dump(self.knowledge, f, ensure_ascii=False, indent=4)
with open(self.responses_file, "w", encoding="utf-8") as f:
json.dump(self.responses, f, ensure_ascii=False, indent=4)
def check_online(self):
"""Checks if the computer is online by pinging Bing."""
try:
requests.get("http://www.bing.com", timeout=5)
return True
except Exception:
return False
# --- Basic NLP Preprocessing ---
def nlp_preprocess(self, text):
"""Cleans text using our NLP processor."""
text = re.sub(r"<.*?>", " ", text) # Remove HTML tags
text = re.sub(r"\s+", " ", text) # Remove extra whitespace
return self.nlp_processor.clean_text(text)
# --- Summarize text ---
def summarize_text(self, text, max_len=300):
"""Returns a summary of text up to max_len characters."""
if len(text) > max_len:
return text[:max_len] + "..."
return text
# --- Multi-Source Searching ---
def search_bing(self, query):
"""Searches Bing for the query."""
try:
resp = requests.get(f"https://www.bing.com/search?q={query}", timeout=5)
if resp and resp.text:
cleaned = self.nlp_preprocess(resp.text)
return self.summarize_text(cleaned)
except Exception:
pass
return ""
def search_yandex(self, query):
"""Searches Yandex for the query."""
try:
resp = requests.get(f"https://yandex.com/search/?text={query}", timeout=5)
if resp and resp.text:
cleaned = self.nlp_preprocess(resp.text)
return self.summarize_text(cleaned)
except Exception:
pass
return ""
def fact_check(self, query):
"""
Searches both Bing and Yandex, then uses the Analyzer and Thinking Chain
to generate a well-reasoned, summarized answer.
"""
bing_res = self.search_bing(query)
yandex_res = self.search_yandex(query)
analyzed = self.analyzer.analyze(query, bing_res, yandex_res)
reasoning = self.thinking_chain.reason_step_by_step(query)
if reasoning:
return f"{reasoning}\n\n{analyzed}"
return analyzed
# --- Knowledge & Response Storage ---
def learn_knowledge(self, topic, info):
"""Stores new knowledge if not already present or if new info is more detailed."""
if topic not in self.knowledge or len(info) > len(self.knowledge[topic]):
self.knowledge[topic] = info
self.save_data()
def learn_response(self, question, answer):
"""Stores a direct response for a given query."""
if question not in self.responses:
self.responses[question] = answer
self.save_data()
# --- Predict & Suggest Related Questions ---
def predict_questions(self, user_input):
suggestions = []
for question in self.knowledge:
if user_input.lower() in question.lower():
suggestions.append(question)
else:
sim = self.similarity(user_input, question)
if sim > 0.3:
suggestions.append(question)
return suggestions
def similarity(self, text1, text2):
"""Computes a simple similarity score based on word overlap."""
words1 = set(text1.lower().split())
words2 = set(text2.lower().split())
if not words1 or not words2:
return 0
common = words1.intersection(words2)
return len(common) / max(len(words1), len(words2))
# --- Casual Chat using Offline LLM ---
def generate_llm_response(self, prompt, max_length=100):
"""Generates a natural conversation response using the offline LLM (GPT-2)."""
if self.llm.tokenizer is None or self.llm.model is None:
return ""
try:
input_ids = self.llm.tokenizer.encode(prompt, return_tensors="pt")
outputs = self.llm.model.generate(
input_ids, max_length=max_length, num_return_sequences=1,
no_repeat_ngram_size=2, do_sample=True, top_p=0.95, top_k=50
)
response = self.llm.tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
except Exception:
return ""
def casual_chat(self, user_input):
"""
Handles casual conversation using the offline LLM.
If the LLM fails, falls back to predefined casual responses.
"""
prompt = f"User said: '{user_input}'. Respond in a friendly, casual tone."
llm_response = self.generate_llm_response(prompt)
if llm_response:
return llm_response
else:
return random.choice(self.casual_responses)
# --- Main Respond Method ---
def respond(self, user_input):
"""
Generates a response:
- For factual queries (identified by keywords), it searches online via fact_check.
- For casual or greeting queries, it uses casual_chat.
- If a stored response exists, it returns that.
- If offline and unknown, it prompts the user to teach it.
- Also suggests related topics.
"""
lower_input = user_input.lower()
factual_keywords = {"what", "who", "when", "where", "why"}
is_factual = any(word in lower_input.split() for word in factual_keywords)
if "how are you" in lower_input:
is_factual = False
# If greeting or non-factual, handle via casual chat.
if lower_input in self.greeting_phrases or not is_factual:
return self.casual_chat(user_input)
if user_input in self.responses:
return self.responses[user_input]
if user_input in self.knowledge:
return f"I found this in my knowledge: {self.knowledge[user_input]}"
if self.check_online():
answer = self.fact_check(user_input)
if answer and "No reliable information found online." not in answer:
self.learn_knowledge(user_input, answer)
else:
answer = "No reliable information found online."
else:
print("\n🤔 I don't know this yet. Please teach me!")
new_response = input("Enter a response: ")
self.learn_response(user_input, new_response)
return "Got it! I'll remember this."
suggestions = self.predict_questions(user_input)
if suggestions:
answer += "\n\nYou might also be interested in these related topics:\n" + "\n".join(suggestions)
return answer
def show_code(self):
"""Displays stored knowledge and responses for debugging."""
print("\n📜 **CGG-EUSO Code Check Mode** 📜")
print("\nKnowledge stored:")
print(json.dumps(self.knowledge, ensure_ascii=False, indent=4))
print("\nResponses stored:")
print(json.dumps(self.responses, ensure_ascii=False, indent=4))
# --- Step 7: Run the Chatbot ---
bot = CGG_EUSO()
print("\n💬 CGG-EUSO is ready! Choose a mode:")
print("1️⃣ Chat Mode")
print("2️⃣ Code Check Mode")
while True:
mode = input("\nSelect (1/2): ").strip()
if mode == "1":
print("\n💬 Enter 'exit' to quit.\n")
while True:
user_input = input("You: ")
if user_input.lower() == "exit":
print("\nGoodbye! 👋")
break
response = bot.respond(user_input)
print(f"CGG-EUSO: {response}")
break
elif mode == "2":
bot.show_code()
break
else:
print("\n❌ Invalid choice! Please enter '1' or '2'.")