[Free Giveaway✨]Keyword Filter for Topic Relevance(AI powered)

GosuSEO

Junior Member
Joined
Nov 11, 2022
Messages
154
Reaction score
73
import openai
import os
from translate import Translator
import concurrent.futures
from tqdm import tqdm
import backoff
import requests.exceptions

# Set your API key
api_key = "Your OpenAI API Key"

def read_keywords(file_path):
with open(file_path, "r", encoding="utf-8") as file:
for line in file:
yield line.strip()

def is_english(text):
try:
text.encode('ascii')
return True
except UnicodeEncodeError:
return False

def translate_to_english(text):
translator = Translator(to_lang="en")
translated = translator.translate(text)
return translated

@backoff.on_exception(backoff.expo, requests.exceptions.RequestException, max_tries=10)
def is_keyword_related(topic, keyword):
openai.api_key = api_key

prompt = f"Given the topic '{topic}', is the keyword '{keyword}' related to the topic? (yes or no, only say Yes or no and you must not say anything else.)\n"
response = openai.Completion.create(
engine="text-davinci-003",
prompt=prompt,
max_tokens=10,
n=1,
stop=None,
temperature=0.7,
)

output = response.choices[0].text.strip().lower()
return output == "yes"

def process_keyword(keyword):
if not is_english(keyword):
keyword = translate_to_english(keyword)

is_related = is_keyword_related(topic, keyword)
print(f"Keyword: {keyword}, Is related? {'Yes' if is_related else 'No'}")

if is_related:
return keyword
return None

# Define your topic
topic = "Search Engine Optimization(SEO)"

# Load your keyword list line by line
keyword_generator = list(read_keywords("keyword_plastic_surgery.txt"))

# Process keywords using 100 threads
filtered_keywords = []
thread_count = 20
batch_size = 100
batch_count = 1

with concurrent.futures.ThreadPoolExecutor(max_workers=thread_count) as executor:
futures = {executor.submit(process_keyword, keyword): keyword for keyword in keyword_generator}

for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="Processing keywords"):
result = future.result()
if result is not None:
filtered_keywords.append(result)

# Save the filtered keywords to a new .txt file every 100 results
if len(filtered_keywords) == batch_size:
with open(f"filtered_keywords_batch_{batch_count}.txt", "w", encoding="utf-8") as file:
for kw in filtered_keywords:
file.write(kw + "\n")
filtered_keywords = []
batch_count += 1

# Save the remaining filtered keywords to the final batch file
if filtered_keywords:
with open(f"filtered_keywords_batch_{batch_count}.txt", "w", encoding="utf-8") as file:
for kw in filtered_keywords:
file.write(kw + "\n")

# Combine all batch files into one file
with open("filtered_keywords_combined.txt", "w", encoding="utf-8") as combined_file:
for i in range(1, batch_count + 1):
with open(f"filtered_keywords_batch_{i}.txt", "r", encoding="utf-8") as batch_file:
for line in batch_file:
combined_file.write(line)
# Delete the batch file after combining
os.remove(f"filtered_keywords_batch_{i}.txt")

print("All filtered keywords combined into filtered_keywords_combined.txt")
The code is a script that filters a list of keywords from a text file to retain only those that are related to a specific topic, in this case, "Search Engine Optimization (SEO)". The code performs the following tasks:

  1. Reads keywords from a text file line by line.
  2. Checks if the keyword is in English, and if not, translates it to English.
  3. Determines if the keyword is related to the topic using OpenAI's GPT-3 model (text-davinci-003 engine). The function is_keyword_related uses the GPT-3 API to evaluate the relationship between the keyword and the topic. It also implements exponential backoff to handle request exceptions.
  4. Processes the keywords using a ThreadPoolExecutor with 20 threads to concurrently process the keywords, improving efficiency.
  5. Writes the filtered keywords to multiple batch files, with a specified batch size (100 in this case).
  6. Combines all batch files into a single "filtered_keywords_combined.txt" file and deletes the individual batch files.
The script requires the following libraries: openai, translate, concurrent.futures, tqdm, and backoff.
The script can be costy: It utilizes GPT-3. It took about 25$ credit for 30k keyword sanitizing.
It does have false positives, but very good at filtering unrelated keywords.

Fund me if you're good! ETH|ARB: 0x4a25FADB34870E0e7ecF6f3AF5EC7066B6Ebc690
 
Back
Top