diff --git a/collector/scripts/link.py b/collector/scripts/link.py index 6ee3f23ea..cc24a5cae 100644 --- a/collector/scripts/link.py +++ b/collector/scripts/link.py @@ -9,6 +9,7 @@ from bs4 import BeautifulSoup # Example Channel URL https://tim.blog/2022/08/09/nft-insider-trading-policy/ def link(): + totalTokens = 0 print("[NOTICE]: The first time running this process it will download supporting libraries.\n\n") fqdn_link = input("Paste in the URL of an online article or blog: ") if(len(fqdn_link) == 0): @@ -33,6 +34,7 @@ def link(): link = append_meta(req, full_text, True) if(len(full_text) > 0): + totalTokens += len(tokenize(full_text)) source = urlparse(req.url) output_filename = f"website-{source.netloc}-{source.path.replace('/','_')}.json" output_path = f"./outputs/website-logs" @@ -58,7 +60,7 @@ def link(): print(f"\n\n[Success]: article or link content fetched!") print(f"////////////////////////////") - print(f"Your estimated cost to embed this data using OpenAI's text-embedding-ada-002 model at $0.0004 / 1K tokens will cost {ada_v2_cost(tokenCount)} using {tokenCount} tokens.") + print(f"Your estimated cost to embed this data using OpenAI's text-embedding-ada-002 model at $0.0004 / 1K tokens will cost {ada_v2_cost(totalTokens)} using {totalTokens} tokens.") print(f"////////////////////////////") exit(0)