Step-by-step tutorial
First, let’s add some data:
import requests
import os
import os
url = 'https://www.libraryofshortstories.com/storiespdf/soldiers-home.pdf'
folder_path = '.data/example/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
file_path = os.path.join(folder_path, 'soldiers-home.pdf')
response = requests.get(url)
if response.status_code == 200:
with open(file_path, 'wb') as file:
file.write(response.content)
print(f'File downloaded and saved to {file_path}')
else:
print(f'Failed to download the file. Status code: {response.status_code}')
import cognee
from os import listdir, path
data_path = path.abspath(".data")
results = await cognee.add("file://" + file_path, "example")
for result in results:
print(result)
We can use DuckDB to easily fetch the datasets we need:
datasets = cognee.datasets.list_datasets()
print(datasets)
for dataset in datasets:
print(dataset)
data_from_dataset = cognee.datasets.query_data(dataset)
for file_info in data_from_dataset:
print(file_info)
And we can also interact with DuckDB directly:
import duckdb
from cognee.root_dir import get_absolute_path
db_path = get_absolute_path("./data/.cognee_system")
db_location = db_path + "/cognee.db"
print(db_location)
db = duckdb.connect(db_location)
tables = db.sql("SELECT DISTINCT schema_name FROM duckdb_tables();").df()
print(list(filter(lambda table_name: table_name.endswith('staging') is False, tables.to_dict()["schema_name"].values())))
Next, we can create graphs out of our datasets:
import cognee
graph = await cognee.cognify("example")
Now, it’s time to search:
from cognee.api.v1.search.search import SearchType
query_params = {
"query": "Tell me about the soldier and his home",
}
results = await cognee.search(SearchType.SIMILARITY, query_params)
for result in results:
print(result)
The context that is returned is the following:
[