",
20 | "temperature": 0.1,
21 | }
22 | }
23 |
24 |
25 | def test_scraping_pipeline(graph_config: dict):
26 | """
27 | Start of the scraping pipeline
28 | """
29 | smart_scraper_graph = SmartScraperGraph(
30 | prompt="List me all the news with their description.",
31 | source="https://perinim.github.io/projects",
32 | config=graph_config,
33 | )
34 |
35 | result = smart_scraper_graph.run()
36 |
37 | assert result is not None
38 |
39 |
40 | def test_get_execution_info(graph_config: dict):
41 | """
42 | Get the execution info
43 | """
44 | smart_scraper_graph = SmartScraperGraph(
45 | prompt="List me all the news with their description.",
46 | source="https://perinim.github.io/projects",
47 | config=graph_config,
48 | )
49 |
50 | smart_scraper_graph.run()
51 |
52 | graph_exec_info = smart_scraper_graph.get_execution_info()
53 |
54 | assert graph_exec_info is not None
55 |
--------------------------------------------------------------------------------
/tests/graphs/smart_scraper_fireworks_test.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for testing the smart scraper class
3 | """
4 |
5 | import os
6 |
7 | import pytest
8 | from dotenv import load_dotenv
9 |
10 | from scrapegraphai.graphs import SmartScraperGraph
11 |
12 | load_dotenv()
13 |
14 |
15 | @pytest.fixture
16 | def graph_config():
17 | """Configuration of the graph"""
18 | fireworks_api_key = os.getenv("FIREWORKS_APIKEY")
19 | return {
20 | "llm": {
21 | "api_key": fireworks_api_key,
22 | "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct",
23 | },
24 | "verbose": True,
25 | "headless": False,
26 | }
27 |
28 |
29 | def test_scraping_pipeline(graph_config):
30 | """Start of the scraping pipeline"""
31 | smart_scraper_graph = SmartScraperGraph(
32 | prompt="List me all the projects with their description.",
33 | source="https://perinim.github.io/projects/",
34 | config=graph_config,
35 | )
36 |
37 | result = smart_scraper_graph.run()
38 |
39 | assert result is not None
40 | assert isinstance(result, dict)
41 |
42 |
43 | def test_get_execution_info(graph_config):
44 | """Get the execution info"""
45 | smart_scraper_graph = SmartScraperGraph(
46 | prompt="List me all the projects with their description.",
47 | source="https://perinim.github.io/projects/",
48 | config=graph_config,
49 | )
50 |
51 | smart_scraper_graph.run()
52 |
53 | graph_exec_info = smart_scraper_graph.get_execution_info()
54 |
55 | assert graph_exec_info is not None
56 |
--------------------------------------------------------------------------------
/tests/graphs/smart_scraper_multi_lite_graph_openai_test.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for testing the smart scraper class
3 | """
4 |
5 | import os
6 |
7 | import pytest
8 | from dotenv import load_dotenv
9 |
10 | from scrapegraphai.graphs import SmartScraperMultiLiteGraph
11 |
12 | load_dotenv()
13 |
14 |
15 | @pytest.fixture
16 | def graph_config():
17 | """Configuration of the graph"""
18 | openai_key = os.getenv("OPENAI_APIKEY")
19 |
20 | return {
21 | "llm": {
22 | "api_key": openai_key,
23 | "model": "openai/gpt-3.5-turbo",
24 | },
25 | "verbose": True,
26 | "headless": False,
27 | }
28 |
29 |
30 | def test_scraping_pipeline(graph_config):
31 | """Start of the scraping pipeline"""
32 | smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
33 | prompt="Who is ?",
34 | source=["https://perinim.github.io/", "https://perinim.github.io/cv/"],
35 | config=graph_config,
36 | )
37 |
38 | result = smart_scraper_multi_lite_graph.run()
39 |
40 | assert result is not None
41 | assert isinstance(result, dict)
42 |
43 |
44 | def test_get_execution_info(graph_config):
45 | """Get the execution info"""
46 | smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
47 | prompt="Who is ?",
48 | source=["https://perinim.github.io/", "https://perinim.github.io/cv/"],
49 | config=graph_config,
50 | )
51 |
52 | smart_scraper_multi_lite_graph.run()
53 |
54 | graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
55 |
56 | assert graph_exec_info is not None
57 |
--------------------------------------------------------------------------------
/tests/graphs/smart_scraper_ollama_test.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for testing th smart scraper class
3 | """
4 |
5 | import pytest
6 |
7 | from scrapegraphai.graphs import SmartScraperGraph
8 |
9 |
10 | @pytest.fixture
11 | def graph_config():
12 | """
13 | Configuration of the graph
14 | """
15 | return {
16 | "llm": {
17 | "model": "ollama/mistral",
18 | "temperature": 0,
19 | "format": "json",
20 | "base_url": "http://localhost:11434",
21 | }
22 | }
23 |
24 |
25 | def test_scraping_pipeline(graph_config: dict):
26 | """
27 | Start of the scraping pipeline
28 | """
29 | smart_scraper_graph = SmartScraperGraph(
30 | prompt="List me all the news with their description.",
31 | source="https://perinim.github.io/projects",
32 | config=graph_config,
33 | )
34 |
35 | result = smart_scraper_graph.run()
36 |
37 | assert result is not None
38 |
39 |
40 | def test_get_execution_info(graph_config: dict):
41 | """
42 | Get the execution info
43 | """
44 | smart_scraper_graph = SmartScraperGraph(
45 | prompt="List me all the news with their description.",
46 | source="https://perinim.github.io/projects",
47 | config=graph_config,
48 | )
49 |
50 | smart_scraper_graph.run()
51 |
52 | graph_exec_info = smart_scraper_graph.get_execution_info()
53 |
54 | assert graph_exec_info is not None
55 |
--------------------------------------------------------------------------------
/tests/inputs/username.csv:
--------------------------------------------------------------------------------
1 | Username; Identifier;First name;Last name
2 | booker12;9012;Rachel;Booker
3 | grey07;2070;Laura;Grey
4 | johnson81;4081;Craig;Johnson
5 | jenkins46;9346;Mary;Jenkins
6 | smith79;5079;Jamie;Smith
7 |
--------------------------------------------------------------------------------
/tests/nodes/inputs/username.csv:
--------------------------------------------------------------------------------
1 | Username; Identifier;First name;Last name
2 | booker12;9012;Rachel;Booker
3 | grey07;2070;Laura;Grey
4 | johnson81;4081;Craig;Johnson
5 | jenkins46;9346;Mary;Jenkins
6 | smith79;5079;Jamie;Smith
7 |
--------------------------------------------------------------------------------
/tests/nodes/search_internet_node_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from langchain_community.chat_models import ChatOllama
4 |
5 | from scrapegraphai.nodes import SearchInternetNode
6 |
7 |
8 | class TestSearchInternetNode(unittest.TestCase):
9 | def setUp(self):
10 | # Configuration for the graph
11 | self.graph_config = {
12 | "llm": {"model": "llama3", "temperature": 0, "streaming": True},
13 | "search_engine": "google",
14 | "max_results": 3,
15 | "verbose": True,
16 | }
17 |
18 | # Define the model
19 | self.llm_model = ChatOllama(self.graph_config["llm"])
20 |
21 | # Initialize the SearchInternetNode
22 | self.search_node = SearchInternetNode(
23 | input="user_input",
24 | output=["search_results"],
25 | node_config={
26 | "llm_model": self.llm_model,
27 | "search_engine": self.graph_config["search_engine"],
28 | "max_results": self.graph_config["max_results"],
29 | "verbose": self.graph_config["verbose"],
30 | },
31 | )
32 |
33 | def test_execute_search_node(self):
34 | # Initial state
35 | state = {"user_input": "What is the capital of France?"}
36 |
37 | # Expected output
38 | expected_output = {
39 | "user_input": "What is the capital of France?",
40 | "search_results": [
41 | "https://en.wikipedia.org/wiki/Paris",
42 | "https://en.wikipedia.org/wiki/France",
43 | "https://en.wikipedia.org/wiki/%C3%8Ele-de-France",
44 | ],
45 | }
46 |
47 | # Execute the node
48 | result = self.search_node.execute(state)
49 |
50 | # Assert the results
51 | self.assertEqual(result, expected_output)
52 |
53 |
54 | if __name__ == "__main__":
55 | unittest.main()
56 |
--------------------------------------------------------------------------------
/tests/test_depth_search_graph.py:
--------------------------------------------------------------------------------
1 | from unittest.mock import MagicMock, patch
2 |
3 | import pytest
4 |
5 | from scrapegraphai.graphs.abstract_graph import AbstractGraph
6 | from scrapegraphai.graphs.depth_search_graph import DepthSearchGraph
7 |
8 |
9 | class TestDepthSearchGraph:
10 | """Test suite for DepthSearchGraph class"""
11 |
12 | @pytest.mark.parametrize(
13 | "source, expected_input_key",
14 | [
15 | ("https://example.com", "url"),
16 | ("/path/to/local/directory", "local_dir"),
17 | ],
18 | )
19 | def test_depth_search_graph_initialization(self, source, expected_input_key):
20 | """
21 | Test that DepthSearchGraph initializes correctly with different source types.
22 | This test verifies that the input_key is set to 'url' for web sources and
23 | 'local_dir' for local directory sources.
24 | """
25 | prompt = "Test prompt"
26 | config = {"llm": {"model": "mock_model"}}
27 |
28 | # Mock both BaseGraph and _create_llm method
29 | with (
30 | patch("scrapegraphai.graphs.depth_search_graph.BaseGraph"),
31 | patch.object(AbstractGraph, "_create_llm", return_value=MagicMock()),
32 | ):
33 | graph = DepthSearchGraph(prompt, source, config)
34 |
35 | assert graph.prompt == prompt
36 | assert graph.source == source
37 | assert graph.config == config
38 | assert graph.input_key == expected_input_key
39 |
--------------------------------------------------------------------------------
/tests/test_json_scraper_multi_graph.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ScrapeGraphAI/Scrapegraph-ai/d560070e63d81b2d4097ff35e94b7fbad994c1dd/tests/test_json_scraper_multi_graph.py
--------------------------------------------------------------------------------
/tests/test_smart_scraper_multi_concat_graph.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ScrapeGraphAI/Scrapegraph-ai/d560070e63d81b2d4097ff35e94b7fbad994c1dd/tests/test_smart_scraper_multi_concat_graph.py
--------------------------------------------------------------------------------
/tests/utils/convert_to_md_test.py:
--------------------------------------------------------------------------------
1 | from scrapegraphai.utils.convert_to_md import convert_to_md
2 |
3 |
4 | def test_basic_html_to_md():
5 | html = "This is a paragraph.
This is a heading.
"
6 | assert convert_to_md(html) is not None
7 |
8 |
9 | def test_html_with_links_and_images():
10 | html = 'This is a link and this is an 
'
11 | assert convert_to_md(html) is not None
12 |
13 |
14 | def test_html_with_tables():
15 | html = """
16 |
17 | Header 1 | Header 2 |
18 | Row 1, Cell 1 | Row 1, Cell 2 |
19 | Row 2, Cell 1 | Row 2, Cell 2 |
20 |
21 | """
22 | assert convert_to_md(html) is not None
23 |
24 |
25 | def test_empty_html():
26 | html = ""
27 | assert convert_to_md(html) is not None
28 |
29 |
30 | def test_complex_html_structure():
31 | html = """
32 |
33 |
34 | Main Heading
35 | This is a bold paragraph with italic text.
36 |
37 | - First item
38 | - Second item
39 | - Third item
40 |
41 | Another paragraph with a link.
42 |
43 |
44 | """
45 | assert convert_to_md(html) is not None
46 |
--------------------------------------------------------------------------------
/tests/utils/parse_state_keys_test.py:
--------------------------------------------------------------------------------
1 | """
2 | Parse_state_key test module
3 | """
4 |
5 | from scrapegraphai.utils.parse_state_keys import parse_expression
6 |
7 |
8 | def test_parse_expression():
9 | """Test parse_expression function."""
10 | EXPRESSION = "user_input & (relevant_chunks | parsed_document | document)"
11 | state = {
12 | "user_input": None,
13 | "document": None,
14 | "parsed_document": None,
15 | "relevant_chunks": None,
16 | }
17 | try:
18 | result = parse_expression(EXPRESSION, state)
19 | assert result != []
20 | except ValueError as e:
21 | assert "Error" in str(e)
22 |
--------------------------------------------------------------------------------
/tests/utils/research_web_test.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from scrapegraphai.utils.research_web import ( # Replace with actual path to your file
4 | search_on_web,
5 | )
6 |
7 |
8 | def test_google_search():
9 | """Tests search_on_web with Google search engine."""
10 | results = search_on_web("test query", search_engine="Google", max_results=2)
11 | assert len(results) == 2
12 | # You can further assert if the results actually contain 'test query' in the title/snippet using additional libraries
13 |
14 |
15 | def test_bing_search():
16 | """Tests search_on_web with Bing search engine."""
17 | results = search_on_web("test query", search_engine="Bing", max_results=1)
18 | assert results is not None
19 | # You can further assert if the results contain '.com' or '.org' in the domain
20 |
21 |
22 | def test_invalid_search_engine():
23 | """Tests search_on_web with invalid search engine."""
24 | with pytest.raises(ValueError):
25 | search_on_web("test query", search_engine="Yahoo", max_results=5)
26 |
27 |
28 | def test_max_results():
29 | """Tests search_on_web with different max_results values."""
30 | results_5 = search_on_web("test query", max_results=5)
31 | results_10 = search_on_web("test query", max_results=10)
32 | assert len(results_5) <= len(results_10)
33 |
--------------------------------------------------------------------------------