//g' -e 's/ /dev/shm/llm-websearch.txt
51 | else
52 | #Else simply try to download the URL.
53 | #Save the webpage to a temp file and clean up the HTML tags.
54 | curl -s -L -A 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36' "${lurl}" | html2text > /dev/shm/llm-websearch.txt
55 | fi
56 |
57 | #Ask the LLM to check the text for helpful information and summarize it.
58 | pans=$(llm-python-file.py /dev/shm/llm-websearch.txt "You are a helpful research assistant." "We are trying to research \`${sterm}\` and we have a webpage with the url \`${lurl}\` that has the following text:" "Summarize the portions of the page that help answer \`${sterm}\`. Only directly summarize the text to help the research and nothing else." "0.7" | sed -e 's/\\n/ /g' -e "s/*//g" -e 's/\\//g' | tr -d '\n')
59 |
60 | #Error checking the output for page errors like 403. Commented out because it was being too aggressive, we'll let the last bot filter things until we work that out.
61 | # echeck=$(llm-python-chat.py "You are an error checking assistant. You check if the text represents a webpage error or not." "\`\`\`${pans}\`\`\`\\nOutput only \`PASS\` if the text appears to not be an error, or \`FAIL\` if the text appears to be an error message." "0.0" | sed -e 's/.*content="//g' -e "s/.*content='//g" -e 's/"\, role=.*//g' -e "s/', role=.*//g" -e 's/\\n/ /g' -e "s/*//g" -e 's/\\//g' )
62 | # if [[ "${echeck}" == *FAIL* ]] ; then
63 | # continue
64 | # fi
65 |
66 | #Outputting the info for the user's benefit.
67 | echo "${lurl} | ${ldesc} | ${pans}"
68 |
69 | #Save the output to an array for later.
70 | dans+=("${lurl} | ${ldesc} | ${pans}")
71 |
72 | fi
73 |
74 | done
75 |
76 | #Clear out the old temp file.
77 | echo -n "" > /dev/shm/llm-websearch.txt
78 |
79 | #Start looping through the array we created of site info and summaries.
80 | for dan in "${dans[@]}" ; do
81 |
82 | #Echo the website and LLM summary to the temp file.
83 | echo "${dan}" >> /dev/shm/llm-websearch.txt
84 |
85 | done
86 |
87 | #Read the temp file and try to summarize what we've collected in total. Suggest one URL over all to visit.
88 | llm-python-file.py /dev/shm/llm-websearch.txt "You are a helpful research assistant." "We are trying to research \`${sterm}\` and we have compiled the following list of URLs and their contents." "Try to answer \`${sterm}\` using information from the text. Provide a total summary of the combined information followed by what you think is the BEST URL source from the list." "0.7" | sed -e 's/\\n/ /g' -e "s/*//g" -e 's/\\//g'
89 |
90 |
--------------------------------------------------------------------------------
/llm-websearch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jay4242/llm-websearch/9ab19feaf57476581b4d3ccb8583a2d77920de49/llm-websearch.png
--------------------------------------------------------------------------------