├── .envrc
├── backend
    ├── vector_store
    │   ├── .envrc
    │   ├── .gitignore
    │   ├── build.rs
    │   ├── .sqlx
    │   │   ├── query-63012c5ed8ef1bd8c3c931294767c96a5eae22e960264ab1a6b4b9e957c3c20c.json
    │   │   ├── query-e1166c8c3bde787b1b07787d9a30ec58575597e8223d57d8d02e43944b7d5cae.json
    │   │   ├── query-65f1c255cf0c5df5476c730aea2a9448783ca002a83452a3f245a82b79371652.json
    │   │   ├── query-7060a1b2757a7c942deadc8b86ccc6ea26bbfb5e24db6dfbdea16ebb88c79319.json
    │   │   ├── query-1bac47c3b9f3a187ee440c83f2c22106f0bfdb8d7865eec2d49a0585da7b21ca.json
    │   │   ├── query-07ff355f2f2a14bf0a163f3cb6450f57baa285962f0e9fefe0c8d6a5084e7d0a.json
    │   │   └── query-b67fb1612df114fa58878cc079a5c912765be487fb0d98985d68e6f2dcf4f885.json
    │   ├── Dockerfile
    │   ├── Cargo.toml
    │   ├── README.md
    │   ├── migrations
    │   │   └── 0001_initial.sql
    │   └── src
    │   │   ├── populate.rs
    │   │   ├── embedding.rs
    │   │   ├── main.rs
    │   │   └── db.rs
    ├── db-manager
    │   ├── doc
    │   │   └── intro.md
    │   ├── src
    │   │   ├── statistics
    │   │   │   ├── .gitignore
    │   │   │   ├── utils.clj
    │   │   │   └── core.clj
    │   │   ├── db_manager
    │   │   │   ├── cache.clj
    │   │   │   ├── routes.clj
    │   │   │   ├── core.clj
    │   │   │   └── db.clj
    │   │   ├── exam_scraper
    │   │   │   └── core.clj
    │   │   └── course_scraper
    │   │   │   ├── upsert.clj
    │   │   │   └── watcher.clj
    │   ├── .gitignore
    │   ├── Dockerfile
    │   ├── test
    │   │   └── db_manager
    │   │   │   └── core_test.clj
    │   ├── README.md
    │   ├── CHANGELOG.md
    │   └── project.clj
    └── rust_parser
    │   ├── Dockerfile
    │   ├── Cargo.toml
    │   ├── src
    │       ├── parser
    │       │   ├── workload_information.rs
    │       │   ├── content_serialiser.rs
    │       │   ├── exam_information.rs
    │       │   └── logistic_information.rs
    │       ├── main.rs
    │       └── parser.rs
    │   └── Cargo.lock
├── frontend
    ├── .npmrc
    ├── static
    │   ├── robots.txt
    │   ├── favicon.ico
    │   └── assets
    │   │   ├── og-image.png
    │   │   └── preview.png
    ├── postcss.config.js
    ├── vite.config.ts
    ├── .gitignore
    ├── Dockerfile
    ├── src
    │   ├── components
    │   │   ├── Changelog
    │   │   │   ├── ChangelogButton.svelte
    │   │   │   ├── store.ts
    │   │   │   └── ChangelogModal.svelte
    │   │   ├── GradeGraph
    │   │   │   ├── sample.json
    │   │   │   └── GradeGraph.svelte
    │   │   ├── SideCard.svelte
    │   │   ├── CheckboxMenu.svelte
    │   │   ├── BigCheckbox.svelte
    │   │   ├── Footer
    │   │   │   └── Footer.svelte
    │   │   ├── Loader
    │   │   │   └── Loader.svelte
    │   │   └── OverviewCard
    │   │   │   └── OverviewCard.svelte
    │   ├── assets
    │   │   ├── Dk.svelte
    │   │   ├── CloseCross.svelte
    │   │   ├── Gb.svelte
    │   │   ├── MenuIcon.svelte
    │   │   ├── Github.svelte
    │   │   └── Facebook.svelte
    │   ├── app.d.ts
    │   ├── app.html
    │   ├── app.css
    │   ├── routes
    │   │   ├── +layout.svelte
    │   │   ├── sitemap.xml
    │   │   │   └── +server.ts
    │   │   └── course
    │   │   │   └── [courseId]
    │   │   │       ├── +page.server.ts
    │   │   │       └── +page.svelte
    │   ├── theme.ts
    │   ├── stores.ts
    │   └── course.ts
    ├── svelte.config.js
    ├── .prettierrc.json
    ├── eslint.config.js
    ├── tsconfig.json
    ├── README.md
    ├── package.json
    └── tailwind.config.js
├── .gitignore
├── assets
    └── showcase.gif
├── .gitattributes
├── .github
    └── workflows
    │   ├── docker-ci.yml
    │   ├── deploy.yml
    │   ├── rust-ci.yml
    │   └── frontend-ci.yml
├── .pre-commit-config.yaml
├── LICENSE
├── flake.nix
├── docker-compose.yml
├── CONTRIBUTING.md
├── README.md
└── flake.lock


/.envrc:
--------------------------------------------------------------------------------
1 | use flake
2 | 


--------------------------------------------------------------------------------
/backend/vector_store/.envrc:
--------------------------------------------------------------------------------
1 | use flake
2 | 


--------------------------------------------------------------------------------
/backend/vector_store/.gitignore:
--------------------------------------------------------------------------------
1 | .fastembed_cache
2 | 


--------------------------------------------------------------------------------
/frontend/.npmrc:
--------------------------------------------------------------------------------
1 | engine-strict=true
2 | resolution-mode=highest
3 | 


--------------------------------------------------------------------------------
/frontend/static/robots.txt:
--------------------------------------------------------------------------------
1 | User-agent: *
2 | Allow: /
3 | Disallow: /api
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | data/*
2 | .direnv
3 | .lsp
4 | .clj-kondo
5 | target
6 | log.txt
7 | 


--------------------------------------------------------------------------------
/assets/showcase.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshniemela/KU-courses/HEAD/assets/showcase.gif


--------------------------------------------------------------------------------
/frontend/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshniemela/KU-courses/HEAD/frontend/static/favicon.ico


--------------------------------------------------------------------------------
/frontend/static/assets/og-image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshniemela/KU-courses/HEAD/frontend/static/assets/og-image.png


--------------------------------------------------------------------------------
/frontend/static/assets/preview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshniemela/KU-courses/HEAD/frontend/static/assets/preview.png


--------------------------------------------------------------------------------
/frontend/postcss.config.js:
--------------------------------------------------------------------------------
1 | export default {
2 |     plugins: {
3 |         tailwindcss: {},
4 |         autoprefixer: {},
5 |     },
6 | };
7 | 


--------------------------------------------------------------------------------
/backend/db-manager/doc/intro.md:
--------------------------------------------------------------------------------
1 | # Introduction to db-manager
2 | 
3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/)
4 | 


--------------------------------------------------------------------------------
/frontend/vite.config.ts:
--------------------------------------------------------------------------------
1 | import { sveltekit } from "@sveltejs/kit/vite";
2 | import { defineConfig } from "vite";
3 | 
4 | export default defineConfig({
5 |     plugins: [sveltekit()],
6 | });
7 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | frontend/src/assets/* linguist-vendored
2 | frontend/src/theme.js linguist-vendored
3 | backend/rust_parser/test_data/pages/* linguist-vendored
4 | backend/employed/* linguist-vendored
5 | 


--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | node_modules
 3 | /build
 4 | /.svelte-kit
 5 | /package
 6 | .env
 7 | .env.*
 8 | !.env.example
 9 | vite.config.js.timestamp-*
10 | vite.config.ts.timestamp-*
11 | 


--------------------------------------------------------------------------------
/backend/vector_store/build.rs:
--------------------------------------------------------------------------------
1 | // generated by `sqlx migrate build-script`
2 | fn main() {
3 |     // trigger recompilation when a new migration is added
4 |     println!("cargo:rerun-if-changed=migrations");
5 | }
6 | 


--------------------------------------------------------------------------------
/backend/db-manager/src/statistics/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | /classes
 3 | /checkouts
 4 | profiles.clj
 5 | pom.xml
 6 | pom.xml.asc
 7 | *.jar
 8 | *.class
 9 | /.lein-*
10 | /.nrepl-port
11 | /.prepl-port
12 | .hgignore
13 | .hg/
14 | 


--------------------------------------------------------------------------------
/backend/db-manager/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | /classes
 3 | /checkouts
 4 | profiles.clj
 5 | pom.xml
 6 | pom.xml.asc
 7 | *.jar
 8 | *.class
 9 | /.lein-*
10 | /.nrepl-port
11 | /.prepl-port
12 | .hgignore
13 | .hg/
14 | .calva
15 | .lsp
16 | .clj-kondo
17 | 


--------------------------------------------------------------------------------
/frontend/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM node:22.6.0-alpine3.19 AS build
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | COPY package*.json ./
 6 | RUN npm install
 7 | COPY . ./
 8 | RUN npm run build
 9 | RUN npm prune --omit=dev
10 | 
11 | EXPOSE 5000
12 | ENV HOST=0.0.0.0
13 | ENV PORT=5000
14 | CMD ["node", "./build/index.js"]
15 | 
16 | 


--------------------------------------------------------------------------------
/frontend/src/components/Changelog/ChangelogButton.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |     import { modalStore } from "./store";
 3 | </script>
 4 | 
 5 | <button
 6 |     type="button"
 7 |     class="text-kuRed/75 underline md:no-underline md:text-sm relative"
 8 |     onclick={modalStore.open}
 9 | >
10 |     View Changelog
11 | </button>
12 | 


--------------------------------------------------------------------------------
/frontend/src/assets/Dk.svelte:
--------------------------------------------------------------------------------
 1 | <svg
 2 |     xmlns="http://www.w3.org/2000/svg"
 3 |     id="flag-icons-dk"
 4 |     viewBox="0 0 640 480"
 5 | >
 6 |     <path fill="#c8102e" d="M0 0h640.1v480H0z"></path>
 7 |     <path fill="#fff" d="M205.7 0h68.6v480h-68.6z"></path>
 8 |     <path fill="#fff" d="M0 205.7h640.1v68.6H0z"></path>
 9 | </svg>
10 | 


--------------------------------------------------------------------------------
/frontend/src/app.d.ts:
--------------------------------------------------------------------------------
 1 | // See https://kit.svelte.dev/docs/types#app
 2 | // for information about these interfaces
 3 | declare global {
 4 |     namespace App {
 5 |         // interface Error {}
 6 |         // interface Locals {}
 7 |         // interface PageData {}
 8 |         // interface Platform {}
 9 |     }
10 | }
11 | 
12 | export {};
13 | 


--------------------------------------------------------------------------------
/backend/rust_parser/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rust:slim
 2 | 
 3 | ENV TARGET x86_64-unknown-linux-musl
 4 | 
 5 | RUN rustup target add $TARGET
 6 | 
 7 | COPY . /app
 8 | 
 9 | WORKDIR /app
10 | 
11 | RUN cargo build --release --locked --target "$TARGET"
12 | 
13 | 
14 | 
15 | # run the target
16 | RUN chmod +x /app/target/x86_64-unknown-linux-musl/release/rust_parser
17 | 


--------------------------------------------------------------------------------
/frontend/svelte.config.js:
--------------------------------------------------------------------------------
 1 | import adapter from "@sveltejs/adapter-node";
 2 | import { vitePreprocess } from "@sveltejs/vite-plugin-svelte";
 3 | 
 4 | /** @type {import('@sveltejs/kit').Config} */
 5 | const config = {
 6 |     preprocess: vitePreprocess(),
 7 | 
 8 |     kit: {
 9 |         adapter: adapter(),
10 |     },
11 | };
12 | export default config;
13 | 


--------------------------------------------------------------------------------
/frontend/.prettierrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "trailingComma": "es5",
 3 |     "tabWidth": 4,
 4 |     "arrowParens": "always",
 5 |     "bracketSpacing": true,
 6 |     "endOfLine": "lf",
 7 |     "printWidth": 80,
 8 |     "singleQuote": false,
 9 |     "svelteStrictMode": true,
10 |     "svelteAllowShorthand": false,
11 |     "plugins": ["prettier-plugin-svelte"]
12 | }
13 | 


--------------------------------------------------------------------------------
/frontend/src/app.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |     <head>
 4 |         <meta charset="utf-8" />
 5 |         <meta name="viewport" content="width=device-width" />
 6 |         %sveltekit.head%
 7 |     </head>
 8 |     <body data-sveltekit-preload-data="hover">
 9 |         <div style="display: contents">%sveltekit.body%</div>
10 |     </body>
11 | </html>
12 | 


--------------------------------------------------------------------------------
/frontend/src/components/GradeGraph/sample.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     { "grade": "12", "count": 9 },
 3 |     { "grade": "10", "count": 15 },
 4 |     { "grade": "7", "count": 7 },
 5 |     { "grade": "4", "count": 4 },
 6 |     { "grade": "02", "count": 3 },
 7 |     { "grade": "00", "count": 2 },
 8 |     { "grade": "-3", "count": 0 },
 9 |     { "grade": "Ej m\u00f8dt", "count": 9 }
10 | ]
11 | 


--------------------------------------------------------------------------------
/.github/workflows/docker-ci.yml:
--------------------------------------------------------------------------------
 1 | name: Docker CI
 2 | 
 3 | on:
 4 |   push:
 5 |     paths:
 6 |       - ".github/workflows/docker-ci.yml"
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   docker:
13 |     name: Docker compose up
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - uses: actions/checkout@v4
17 |       - name: Docker compose up
18 |         run: docker compose up -d
19 | 


--------------------------------------------------------------------------------
/backend/rust_parser/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rust_parser"
 3 | version = "1.0.0"
 4 | edition = "2021"
 5 | 
 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 7 | 
 8 | [dependencies]
 9 | anyhow = "1.0.75"
10 | pretty_assertions = "1.4.0"
11 | regex = "1.9.6"
12 | serde = { version = "1.0.188", features = ["derive"] }
13 | serde_json = "1.0.107"
14 | tl = "0.7.7"
15 | 


--------------------------------------------------------------------------------
/backend/vector_store/.sqlx/query-63012c5ed8ef1bd8c3c931294767c96a5eae22e960264ab1a6b4b9e957c3c20c.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "db_name": "PostgreSQL",
 3 |   "query": "DELETE FROM course_coordinator WHERE course_id = $1",
 4 |   "describe": {
 5 |     "columns": [],
 6 |     "parameters": {
 7 |       "Left": [
 8 |         "Text"
 9 |       ]
10 |     },
11 |     "nullable": []
12 |   },
13 |   "hash": "63012c5ed8ef1bd8c3c931294767c96a5eae22e960264ab1a6b4b9e957c3c20c"
14 | }
15 | 


--------------------------------------------------------------------------------
/frontend/eslint.config.js:
--------------------------------------------------------------------------------
 1 | import globals from "globals";
 2 | import pluginJs from "@eslint/js";
 3 | import tseslint from "typescript-eslint";
 4 | 
 5 | export default [
 6 |     { files: ["**/*.{js,mjs,cjs,ts}"] },
 7 |     {
 8 |         ignores: [".svelte-kit", "node_modules", "dist"],
 9 |     },
10 |     { languageOptions: { globals: { ...globals.browser, ...globals.node } } },
11 |     pluginJs.configs.recommended,
12 |     ...tseslint.configs.recommended,
13 | ];
14 | 


--------------------------------------------------------------------------------
/backend/db-manager/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rust_parser AS PARSER
 2 | 
 3 | FROM clojure:temurin-20-lein-alpine AS BUILD
 4 | 
 5 | 
 6 | COPY . /code
 7 | 
 8 | WORKDIR /code
 9 | 
10 | RUN lein uberjar
11 | 
12 | FROM eclipse-temurin:20-alpine
13 | 
14 | WORKDIR /app
15 | 
16 | COPY --from=PARSER /app/target/x86_64-unknown-linux-musl/release/rust_parser /usr/local/bin/rust_parser
17 | 
18 | COPY --from=BUILD /code/target/uberjar/*-standalone.jar ./app.jar
19 | 
20 | CMD ["java", "-jar", "app.jar", "-f"]
21 | 


--------------------------------------------------------------------------------
/backend/vector_store/.sqlx/query-e1166c8c3bde787b1b07787d9a30ec58575597e8223d57d8d02e43944b7d5cae.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "db_name": "PostgreSQL",
 3 |   "query": "INSERT INTO course_coordinator (course_id, email) VALUES ($1, $2)",
 4 |   "describe": {
 5 |     "columns": [],
 6 |     "parameters": {
 7 |       "Left": [
 8 |         "Text",
 9 |         "Text"
10 |       ]
11 |     },
12 |     "nullable": []
13 |   },
14 |   "hash": "e1166c8c3bde787b1b07787d9a30ec58575597e8223d57d8d02e43944b7d5cae"
15 | }
16 | 


--------------------------------------------------------------------------------
/frontend/src/components/SideCard.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |     interface Props {
 3 |         heading: string;
 4 |         children?: import("svelte").Snippet;
 5 |     }
 6 | 
 7 |     let { heading, children }: Props = $props();
 8 | </script>
 9 | 
10 | <!--sidecard component-------------------------------------------------------->
11 | <div class="my-4">
12 |     <h3 class="bg-kuGray text-white font-bold px-4">{heading}</h3>
13 |     <div class="px-4">
14 |         {@render children?.()}
15 |     </div>
16 | </div>
17 | 


--------------------------------------------------------------------------------
/backend/vector_store/.sqlx/query-65f1c255cf0c5df5476c730aea2a9448783ca002a83452a3f245a82b79371652.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "db_name": "PostgreSQL",
 3 |   "query": "INSERT INTO coordinator (email, full_name) VALUES ($1, $2)\n                 ON CONFLICT(email) DO NOTHING",
 4 |   "describe": {
 5 |     "columns": [],
 6 |     "parameters": {
 7 |       "Left": [
 8 |         "Text",
 9 |         "Text"
10 |       ]
11 |     },
12 |     "nullable": []
13 |   },
14 |   "hash": "65f1c255cf0c5df5476c730aea2a9448783ca002a83452a3f245a82b79371652"
15 | }
16 | 


--------------------------------------------------------------------------------
/backend/db-manager/src/db_manager/cache.clj:
--------------------------------------------------------------------------------
 1 | (ns db-manager.cache)
 2 | 
 3 | (defn cache
 4 |   "This simply caches the result of a function call. It is used for memoizing the
 5 |   results of a data-source (for instance, a database query)."
 6 |   [hashable data-source]
 7 |   (let [cache-atom (atom {})
 8 |         hashed (hash hashable)
 9 |         result (get @cache-atom hashed)]
10 |     (if result
11 |       result
12 |       (let [result (data-source hashable)]
13 |         (swap! cache-atom assoc hashed result)
14 |         result))))
15 | 


--------------------------------------------------------------------------------
/backend/vector_store/Dockerfile:
--------------------------------------------------------------------------------
 1 | # BUILDER
 2 | FROM rust:bookworm as builder
 3 | 
 4 | WORKDIR /app
 5 | COPY . .
 6 | 
 7 | RUN apt-get update 
 8 | RUN apt-get install -y libssl-dev pkg-config openssl g++
 9 | 
10 | RUN cargo build --release
11 | 
12 | # FINAL IMAGE
13 | FROM debian:bookworm-slim
14 | WORKDIR /app
15 | COPY --from=builder /app/target/release/vector_store ./
16 | 
17 | RUN apt-get update 
18 | RUN apt-get install -y libssl3 ca-certificates
19 | 
20 | ENV SERVER_ADDRESS=0.0.0.0
21 | ENV SERVER_PORT=4000
22 | ENV DATA_DIR="../data/"
23 | CMD ["./vector_store"]
24 | 


--------------------------------------------------------------------------------
/backend/vector_store/.sqlx/query-7060a1b2757a7c942deadc8b86ccc6ea26bbfb5e24db6dfbdea16ebb88c79319.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "db_name": "PostgreSQL",
 3 |   "query": "INSERT INTO course (id, title, content) VALUES ($1, $2, $3)\n             ON CONFLICT(id) DO UPDATE SET title = $2, content = $3, last_modified = CURRENT_TIMESTAMP",
 4 |   "describe": {
 5 |     "columns": [],
 6 |     "parameters": {
 7 |       "Left": [
 8 |         "Text",
 9 |         "Text",
10 |         "Text"
11 |       ]
12 |     },
13 |     "nullable": []
14 |   },
15 |   "hash": "7060a1b2757a7c942deadc8b86ccc6ea26bbfb5e24db6dfbdea16ebb88c79319"
16 | }
17 | 


--------------------------------------------------------------------------------
/frontend/src/assets/CloseCross.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |     interface Props {
 3 |         width?: string;
 4 |         height?: string;
 5 |         classes?: string;
 6 |     }
 7 | 
 8 |     let { width = "", height = "", classes = "" }: Props = $props();
 9 | </script>
10 | 
11 | <svg
12 |     xmlns="http://www.w3.org/2000/svg"
13 |     class={classes}
14 |     width={width}
15 |     height={height}
16 |     viewBox="0 0 24 24"
17 | >
18 |     <path
19 |         fill="currentColor"
20 |         d="M19 6.41L17.59 5L12 10.59L6.41 5L5 6.41L10.59 12L5 17.59L6.41 19L12 13.41L17.59 19L19 17.59L13.41 12z"
21 |     ></path>
22 | </svg>
23 | 


--------------------------------------------------------------------------------
/backend/db-manager/test/db_manager/core_test.clj:
--------------------------------------------------------------------------------
 1 | (ns db-manager.core-test
 2 |   (:require [clojure.test :refer :all]
 3 |             [db-manager.core :refer :all]))
 4 | 
 5 | (deftest merge-test
 6 |   (let [test-employees [{:email "foo@bar.dk" :name "Erik" :title "CEO"}
 7 |                         {:email "foo@bar.dk" :name "Erik" :title "CTO"}
 8 |                         {:email "josh@jniemela.dk" :name "Josh" :title "Developer"}]]
 9 |     (is (= (merge-employees test-employees)
10 |            [{:email "foo@bar.dk" :name "Erik" :title "CEO, CTO"}
11 |             {:email "josh@jniemela.dk" :name "Josh" :title "Developer"}]))))
12 | 
13 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy to server
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   deploy:
10 |     name: Deploy to server
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |       - name: Run SSH commands
15 |         uses: appleboy/ssh-action@v1.0.3
16 |         with:
17 |           host: ${{ secrets.HOST }}
18 |           username: ${{ secrets.USERNAME }}
19 |           key: ${{ secrets.SSH_KEY }}
20 |           port: 22
21 |           script: |
22 |             cd /dockers/KU-courses
23 |             git fetch
24 |             git reset --hard origin/main
25 |             docker compose up -d --build
26 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v2.3.0
 4 |     hooks:
 5 |       - id: check-yaml
 6 |       - id: end-of-file-fixer
 7 |       - id: trailing-whitespace
 8 |   - repo: https://github.com/psf/black
 9 |     rev: 22.10.0
10 |     hooks:
11 |       - id: black
12 |   - repo: https://github.com/pre-commit/mirrors-prettier
13 |     rev: v3.0.0-alpha.9-for-vscode
14 |     hooks:
15 |       - id: prettier
16 |         args: [--config, frontend/prettierrc.json, --write] # edit files in-place
17 |         additional_dependencies:
18 |           - prettier
19 |           - prettier-plugin-svelte
20 |           - svelte
21 | 


--------------------------------------------------------------------------------
/frontend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "extends": "./.svelte-kit/tsconfig.json",
 3 |     "compilerOptions": {
 4 |         "allowJs": true,
 5 |         "checkJs": true,
 6 |         "esModuleInterop": true,
 7 |         "forceConsistentCasingInFileNames": true,
 8 |         "resolveJsonModule": true,
 9 |         "skipLibCheck": true,
10 |         "sourceMap": true,
11 |         "strict": true
12 |     }
13 |     // Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias
14 |     //
15 |     // If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes
16 |     // from the referenced tsconfig.json - TypeScript does not merge them in
17 | }
18 | 


--------------------------------------------------------------------------------
/frontend/src/assets/Gb.svelte:
--------------------------------------------------------------------------------
 1 | <svg
 2 |     xmlns="http://www.w3.org/2000/svg"
 3 |     id="flag-icons-gb"
 4 |     viewBox="0 0 640 480"
 5 | >
 6 |     <path fill="#012169" d="M0 0h640v480H0z"></path>
 7 |     <path
 8 |         fill="#FFF"
 9 |         d="m75 0 244 181L562 0h78v62L400 241l240 178v61h-80L320 301 81 480H0v-60l239-178L0 64V0h75z"
10 |     ></path>
11 |     <path
12 |         fill="#C8102E"
13 |         d="m424 281 216 159v40L369 281h55zm-184 20 6 35L54 480H0l240-179zM640 0v3L391 191l2-44L590 0h50zM0 0l239 176h-60L0 42V0z"
14 |     ></path>
15 |     <path fill="#FFF" d="M241 0v480h160V0H241zM0 160v160h640V160H0z"></path>
16 |     <path fill="#C8102E" d="M0 193v96h640v-96H0zM273 0v480h96V0h-96z"></path>
17 | </svg>
18 | 


--------------------------------------------------------------------------------
/backend/vector_store/.sqlx/query-1bac47c3b9f3a187ee440c83f2c22106f0bfdb8d7865eec2d49a0585da7b21ca.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "db_name": "PostgreSQL",
 3 |   "query": "SELECT title, content FROM course WHERE id = $1",
 4 |   "describe": {
 5 |     "columns": [
 6 |       {
 7 |         "ordinal": 0,
 8 |         "name": "title",
 9 |         "type_info": "Text"
10 |       },
11 |       {
12 |         "ordinal": 1,
13 |         "name": "content",
14 |         "type_info": "Text"
15 |       }
16 |     ],
17 |     "parameters": {
18 |       "Left": [
19 |         "Text"
20 |       ]
21 |     },
22 |     "nullable": [
23 |       false,
24 |       false
25 |     ]
26 |   },
27 |   "hash": "1bac47c3b9f3a187ee440c83f2c22106f0bfdb8d7865eec2d49a0585da7b21ca"
28 | }
29 | 


--------------------------------------------------------------------------------
/.github/workflows/rust-ci.yml:
--------------------------------------------------------------------------------
 1 | name: Rust CI
 2 | 
 3 | on:
 4 |   push:
 5 |     paths:
 6 |       - "backend/rust_parser/**"
 7 |       - "backend/vector_store/**"
 8 |       - ".github/workflows/rust-ci.yml"
 9 |   pull_request:
10 |     branches:
11 |       - main
12 | 
13 | env:
14 |   CARGO_TERM_COLOR: always
15 |   RUSTFLAGS: "-Dwarnings"
16 | 
17 | jobs:
18 |   prettier:
19 |     name: Rust quality check
20 |     runs-on: ubuntu-latest
21 |     strategy:
22 |       matrix:
23 |         service: [rust_parser, vector_store]
24 |     steps:
25 |       - uses: actions/checkout@v4
26 |       - name: Run clippy on ${{ matrix.service }}
27 |         working-directory: backend/${{ matrix.service }}
28 |         run: cargo clippy --all-targets --all-features
29 | 


--------------------------------------------------------------------------------
/frontend/src/components/Changelog/store.ts:
--------------------------------------------------------------------------------
 1 | import { writable } from "svelte/store";
 2 | import { browser } from "$app/environment";
 3 | 
 4 | function modalStoreFunctions() {
 5 |     const { subscribe, set } = writable<boolean>(false);
 6 | 
 7 |     return {
 8 |         subscribe,
 9 |         open: () => {
10 |             console.log(document.body.scrollTop);
11 |             set(true);
12 |             if (browser) {
13 |                 document.body.classList.add("modal-open");
14 |             }
15 |         },
16 |         close: () => {
17 |             set(false);
18 |             if (browser) {
19 |                 document.body.classList.remove("modal-open");
20 |             }
21 |         },
22 |     };
23 | }
24 | 
25 | export const modalStore = modalStoreFunctions();
26 | 


--------------------------------------------------------------------------------
/backend/db-manager/README.md:
--------------------------------------------------------------------------------
 1 | # db-manager
 2 | 
 3 | This module is responsible for interacting with the backend and the PostgreSQL database, this includes creating the tables, destroying the tables, and querying for various things in the database.
 4 | 
 5 | ## Installation
 6 | 
 7 | Install leiningen and run `lein deps` to install dependencies
 8 | 
 9 | ## Usage
10 | 
11 | FIXME: explanation
12 | 
13 |     $ java -jar db-manager-0.1.0-standalone.jar [args]
14 | 
15 | ## Options
16 | 
17 | FIXME: listing of options this app accepts.
18 | 
19 | ## Examples
20 | 
21 | ...
22 | 
23 | ### Bugs
24 | 
25 | ...
26 | 
27 | ### Any Other Sections
28 | ### That You Think
29 | ### Might be Useful
30 | 
31 | ## License
32 | 
33 | Owned by Josh Niemelä, MIT is in effect (MIT license document will be added later)
34 | 


--------------------------------------------------------------------------------
/backend/vector_store/.sqlx/query-07ff355f2f2a14bf0a163f3cb6450f57baa285962f0e9fefe0c8d6a5084e7d0a.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "db_name": "PostgreSQL",
 3 |   "query": "SELECT c.id\n            FROM course c\n            LEFT JOIN title_embedding te ON c.id = te.course_id\n            LEFT JOIN content_embedding ce ON c.id = ce.course_id\n            WHERE\n                c.last_modified > COALESCE(te.last_modified, to_timestamp(0)) OR\n                c.last_modified > COALESCE(ce.last_modified, to_timestamp(0))",
 4 |   "describe": {
 5 |     "columns": [
 6 |       {
 7 |         "ordinal": 0,
 8 |         "name": "id",
 9 |         "type_info": "Text"
10 |       }
11 |     ],
12 |     "parameters": {
13 |       "Left": []
14 |     },
15 |     "nullable": [
16 |       true
17 |     ]
18 |   },
19 |   "hash": "07ff355f2f2a14bf0a163f3cb6450f57baa285962f0e9fefe0c8d6a5084e7d0a"
20 | }
21 | 


--------------------------------------------------------------------------------
/frontend/src/assets/MenuIcon.svelte:
--------------------------------------------------------------------------------
 1 | <svg
 2 |     width="24"
 3 |     height="24"
 4 |     viewBox="0 0 24 24"
 5 |     fill="none"
 6 |     xmlns="http://www.w3.org/2000/svg"
 7 | >
 8 |     <path
 9 |         d="M2 6C2 5.44772 2.44772 5 3 5H21C21.5523 5 22 5.44772 22 6C22 6.55228 21.5523 7 21 7H3C2.44772 7 2 6.55228 2 6Z"
10 |         fill="currentColor"
11 |     ></path>
12 |     <path
13 |         d="M2 12.0322C2 11.4799 2.44772 11.0322 3 11.0322H21C21.5523 11.0322 22 11.4799 22 12.0322C22 12.5845 21.5523 13.0322 21 13.0322H3C2.44772 13.0322 2 12.5845 2 12.0322Z"
14 |         fill="currentColor"
15 |     ></path>
16 |     <path
17 |         d="M3 17.0645C2.44772 17.0645 2 17.5122 2 18.0645C2 18.6167 2.44772 19.0645 3 19.0645H21C21.5523 19.0645 22 18.6167 22 18.0645C22 17.5122 21.5523 17.0645 21 17.0645H3Z"
18 |         fill="currentColor"
19 |     ></path>
20 | </svg>
21 | 


--------------------------------------------------------------------------------
/backend/vector_store/.sqlx/query-b67fb1612df114fa58878cc079a5c912765be487fb0d98985d68e6f2dcf4f885.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "db_name": "PostgreSQL",
 3 |   "query": "SELECT id, title, content FROM course WHERE id = ANY($1)",
 4 |   "describe": {
 5 |     "columns": [
 6 |       {
 7 |         "ordinal": 0,
 8 |         "name": "id",
 9 |         "type_info": "Text"
10 |       },
11 |       {
12 |         "ordinal": 1,
13 |         "name": "title",
14 |         "type_info": "Text"
15 |       },
16 |       {
17 |         "ordinal": 2,
18 |         "name": "content",
19 |         "type_info": "Text"
20 |       }
21 |     ],
22 |     "parameters": {
23 |       "Left": [
24 |         "TextArray"
25 |       ]
26 |     },
27 |     "nullable": [
28 |       false,
29 |       false,
30 |       false
31 |     ]
32 |   },
33 |   "hash": "b67fb1612df114fa58878cc079a5c912765be487fb0d98985d68e6f2dcf4f885"
34 | }
35 | 


--------------------------------------------------------------------------------
/backend/db-manager/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Change Log
 2 | All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/).
 3 | 
 4 | ## [Unreleased]
 5 | ### Changed
 6 | - Add a new arity to `make-widget-async` to provide a different widget shape.
 7 | 
 8 | ## [0.1.1] - 2023-05-23
 9 | ### Changed
10 | - Documentation on how to make the widgets.
11 | 
12 | ### Removed
13 | - `make-widget-sync` - we're all async, all the time.
14 | 
15 | ### Fixed
16 | - Fixed widget maker to keep working when daylight savings switches over.
17 | 
18 | ## 0.1.0 - 2023-05-23
19 | ### Added
20 | - Files from the new template.
21 | - Widget maker public API - `make-widget-sync`.
22 | 
23 | [Unreleased]: https://sourcehost.site/your-name/db-manager/compare/0.1.1...HEAD
24 | [0.1.1]: https://sourcehost.site/your-name/db-manager/compare/0.1.0...0.1.1
25 | 


--------------------------------------------------------------------------------
/frontend/src/app.css:
--------------------------------------------------------------------------------
 1 | @tailwind base;
 2 | @tailwind components;
 3 | @tailwind utilities;
 4 | 
 5 | /* FIXME: why does this need to be in app.css? */
 6 | body.modal-open {
 7 |     position: fixed;
 8 | }
 9 | 
10 | html {
11 |     overflow-y: scroll;
12 | }
13 | 
14 | @layer utilities {
15 |     input[type="checkbox"] {
16 |         @apply appearance-none w-4 h-4 border-2 border-kuGray rounded-none bg-white checked:bg-kuRed;
17 |     }
18 | }
19 | 
20 | @layer components {
21 |     input[type="checkbox"]::after {
22 |         content: "";
23 |         position: absolute;
24 |         display: none;
25 |         top: 9px;
26 |         left: 6px;
27 |         width: 4px;
28 |         height: 8px;
29 |         border: solid white;
30 |         border-width: 0 2px 2px 0;
31 |         transform: rotate(45deg);
32 |     }
33 | 
34 |     input[type="checkbox"]:checked::after {
35 |         display: block;
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/.github/workflows/frontend-ci.yml:
--------------------------------------------------------------------------------
 1 | name: Frontend CI
 2 | 
 3 | on:
 4 |   push:
 5 |     paths:
 6 |       - "frontend/**"
 7 |       - ".github/workflows/frontend-ci.yml"
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 | 
12 | jobs:
13 |   prettier:
14 |     name: Code quality check
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       matrix:
18 |         node-version: [18, 20, 22]
19 |     steps:
20 |       - uses: actions/checkout@v4
21 |       - uses: actions/setup-node@v4
22 |         with:
23 |           node-version: ${{ matrix.node-version }}
24 |           cache: "npm"
25 |           cache-dependency-path: ./frontend/package-lock.json
26 | 
27 |       - name: Clean install
28 |         working-directory: ./frontend
29 |         run: npm ci
30 | 
31 |       - name: Prettier & Eslint
32 |         working-directory: ./frontend
33 |         run: npm run lint
34 | 
35 |       - name: Svelte check
36 |         working-directory: ./frontend
37 |         run: npm run check
38 | 


--------------------------------------------------------------------------------
/backend/vector_store/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "vector_store"
 3 | version = "1.0.0"
 4 | edition = "2021"
 5 | 
 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 7 | 
 8 | [dependencies]
 9 | acap = "0.3.0"
10 | anyhow = "1.0.77"
11 | axum = "0.7.3"
12 | bincode = "1.3.3"
13 | criterion = "0.5.1"
14 | fastembed = "4.9.1"
15 | nanohtml2text = "0.1.4"
16 | ndarray = "0.15.6"
17 | ort = "2.0.0-alpha.4"
18 | rayon = "1.8.0"
19 | serde = "1.0.193"
20 | serde_json = "1.0.108"
21 | tokenizers = "0.15.0"
22 | tokio = {version = "1.32.0", features = ["full"]}
23 | sqlx = { version = "0.7", features = ["runtime-tokio", "postgres", "macros", "json", "time"] }
24 | openssl = { version = "0.10.64", features = ["vendored"] }
25 | pgvector = { version = "0.3", features = ["sqlx"] }
26 | lazy_static = "1.5.0"
27 | async-stream = "0.3.5"
28 | futures-util = "0.3.30"
29 | futures-core = "0.3.30"
30 | #lto = "fat"
31 | #codegen-units = 1
32 | #panic = "abort"
33 | #debug=true
34 | 


--------------------------------------------------------------------------------
/frontend/src/routes/+layout.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |     import { run } from "svelte/legacy";
 3 | 
 4 |     import "../app.css";
 5 |     import { browser } from "$app/environment";
 6 |     import { page } from "$app/stores";
 7 |     interface Props {
 8 |         children?: import("svelte").Snippet;
 9 |     }
10 | 
11 |     let { children }: Props = $props();
12 | 
13 |     run(() => {
14 |         if (browser && (window as any).goatcounter) {
15 |             (window as any).goatcounter.count({
16 |                 path: $page.url.pathname,
17 |             });
18 |         }
19 |     });
20 | </script>
21 | 
22 | <svelte:head>
23 |     <script
24 |         data-goatcounter-settings={'{"allow-local":true, "no_onload": true}'}
25 |         data-goatcounter="https://kucourses.goatcounter.com/count"
26 |         async
27 |         src="//gc.zgo.at/count.js"
28 |     ></script>
29 | </svelte:head>
30 | 
31 | <div class="main">
32 |     <div class="slot-container content-offset">
33 |         {@render children?.()}
34 |     </div>
35 | </div>
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2023 Joshua Niemelä
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
 1 | # create-svelte
 2 | 
 3 | Everything you need to build a Svelte project, powered by [`create-svelte`](https://github.com/sveltejs/kit/tree/master/packages/create-svelte).
 4 | 
 5 | ## Creating a project
 6 | 
 7 | If you're seeing this, you've probably already done this step. Congrats!
 8 | 
 9 | ```bash
10 | # create a new project in the current directory
11 | npm create svelte@latest
12 | 
13 | # create a new project in my-app
14 | npm create svelte@latest my-app
15 | ```
16 | 
17 | ## Developing
18 | 
19 | Once you've created a project and installed dependencies with `npm install` (or `pnpm install` or `yarn`), start a development server:
20 | 
21 | ```bash
22 | npm run dev
23 | 
24 | # or start the server and open the app in a new browser tab
25 | npm run dev -- --open
26 | ```
27 | 
28 | ## Building
29 | 
30 | To create a production version of your app:
31 | 
32 | ```bash
33 | npm run build
34 | ```
35 | 
36 | You can preview the production build with `npm run preview`.
37 | 
38 | > To deploy your app, you may need to install an [adapter](https://kit.svelte.dev/docs/adapters) for your target environment.
39 | 


--------------------------------------------------------------------------------
/backend/db-manager/project.clj:
--------------------------------------------------------------------------------
 1 | (defproject db-manager "1.1.0"
 2 |   :description ""
 3 |   :url "https://github.com/joshniemela/disproject"
 4 |   :license {:name ""
 5 |             :url ""}
 6 |   :dependencies [[org.clojure/clojure "1.11.1"]
 7 |                  [org.clojure/data.json "2.4.0"]
 8 |                  [org.jsoup/jsoup "1.16.1"]
 9 |                  [http-kit "2.3.0"]
10 |                  [ring "1.10.0"]
11 |                  [ring/ring-codec "1.2.0"]
12 |                  [metosin/reitit "0.6.0"]
13 |                  [metosin/muuntaja "0.6.8"]
14 |                  [metosin/reitit-swagger-ui "0.7.0-alpha4"]
15 |                  [org.clojure/tools.cli "1.0.214"]
16 |                  [ring-cors "0.1.13"]
17 |                  [io.staticweb/rate-limit "1.1.0"]
18 |                  [clj-http "3.12.3"]
19 |                  [datascript "1.5.3"]
20 |                  [technology.tabula/tabula "1.0.5"]]
21 |   :main ^:skip-aot db-manager.core
22 |   :resource-path "resources"
23 |   :target-path "target/%s"
24 |   :profiles {:uberjar {:aot :all
25 |                        :jvm-opts ["-Dclojure.compiler.direct-linking=true"]}})
26 | 


--------------------------------------------------------------------------------
/flake.nix:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | {
 4 |   description = "A basic Rust devshell for NixOS users developing Leptos";
 5 | 
 6 |   inputs = {
 7 |     nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
 8 |     flake-utils.url = "github:numtide/flake-utils";
 9 |     rust-overlay.url = "github:oxalica/rust-overlay";
10 |   };
11 | 
12 |   outputs = {
13 |     self,
14 |     nixpkgs,
15 |     flake-utils,
16 |     rust-overlay,
17 |     ...
18 |   }:
19 |     flake-utils.lib.eachDefaultSystem (
20 |       system: let
21 |         overlays = [(import rust-overlay) ];
22 |         pkgs = import nixpkgs {
23 |           inherit system overlays;
24 |         };
25 |       in
26 |         with pkgs; {
27 |           devShells.default = mkShell {
28 |             buildInputs = [
29 |               docker
30 |               docker-compose
31 |               #git
32 |               #nodejs
33 |               #leiningen
34 |               #pipenv
35 |               #python311
36 | 
37 |               ## browsers for testing
38 |               #firefox
39 |               #chromium
40 |               #pkg-config
41 |               #rust-bin.stable.latest.default 
42 |             ];
43 | 
44 |             DATABASE_URL="postgres://postgres:password123@localhost:5432/disku";
45 | 
46 |           };
47 |         }
48 |     );
49 | }
50 | 


--------------------------------------------------------------------------------
/frontend/src/assets/Github.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |     interface Props {
 3 |         width?: string;
 4 |         height?: string;
 5 |     }
 6 | 
 7 |     let { width = "1rem", height = "1rem" }: Props = $props();
 8 | </script>
 9 | 
10 | <svg
11 |     width={width}
12 |     height={height}
13 |     viewBox="0 0 100 100"
14 |     xmlns="http://www.w3.org/2000/svg"
15 |     ><path
16 |         fill-rule="evenodd"
17 |         clip-rule="evenodd"
18 |         d="M48.854 0C21.839 0 0 22 0 49.217c0 21.756 13.993 40.172 33.405 46.69 2.427.49 3.316-1.059 3.316-2.362 0-1.141-.08-5.052-.08-9.127-13.59 2.934-16.42-5.867-16.42-5.867-2.184-5.704-5.42-7.17-5.42-7.17-4.448-3.015.324-3.015.324-3.015 4.934.326 7.523 5.052 7.523 5.052 4.367 7.496 11.404 5.378 14.235 4.074.404-3.178 1.699-5.378 3.074-6.6-10.839-1.141-22.243-5.378-22.243-24.283 0-5.378 1.94-9.778 5.014-13.2-.485-1.222-2.184-6.275.486-13.038 0 0 4.125-1.304 13.426 5.052a46.97 46.97 0 0 1 12.214-1.63c4.125 0 8.33.571 12.213 1.63 9.302-6.356 13.427-5.052 13.427-5.052 2.67 6.763.97 11.816.485 13.038 3.155 3.422 5.015 7.822 5.015 13.2 0 18.905-11.404 23.06-22.324 24.283 1.78 1.548 3.316 4.481 3.316 9.126 0 6.6-.08 11.897-.08 13.526 0 1.304.89 2.853 3.316 2.364 19.412-6.52 33.405-24.935 33.405-46.691C97.707 22 75.788 0 48.854 0z"
19 |         fill="#24292f"
20 |     ></path></svg
21 | >
22 | 


--------------------------------------------------------------------------------
/backend/vector_store/README.md:
--------------------------------------------------------------------------------
 1 | # Vector Search API
 2 | 
 3 | This service is a simple API that allows you to search for the most relevant courses in
 4 | the course catalogue using a query string.  
 5 | The sentence embedding model, `all-MiniLM-L12-v2` is used to embed the course descriptions, coordinator names, and course titles into vectors that can be very rapidly queried for similarity.
 6 | 
 7 | # Features
 8 | * Fast search for relevant courses
 9 | * Automatic asynchronous embedding of course and coordinator data
10 | * Automatic insertion of new courses into the database
11 | 
12 | # Requirements
13 | * Rust
14 | * docker
15 | * docker-compose (optional)
16 | * A connection to a PostgreSQL database containing the course catalogue and with the [pgvector](https://github.com/pgvector/pgvector) extension installed.
17 | 
18 | # Deployment / Installation
19 | * The service can be deployed with the docker-compose file in the root of the repository.
20 | * It can be built using `docker build .` and manually be run if desired.
21 | 
22 | ## Usage
23 | * The API accepts a POST request with a JSON body containing a single key, 'query', with the search query as a string.
24 | 
25 | Example:
26 | ```json
27 | {
28 |     "query": "Machine Learning"
29 | }
30 | ```
31 | 
32 | The API will return a JSON response with the key 'results' containing a list of the 10 most relevant courses in the course catalogue.
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   frontend:
 3 |     build: 
 4 |       context: ./frontend
 5 | 
 6 |     restart: always
 7 |     ports:
 8 |       - "5000:5000"
 9 |     networks:
10 |       - dis-network
11 |     depends_on:
12 |       - server
13 | 
14 |   server:
15 |     build: ./backend/db-manager
16 |     restart: always
17 |     container_name: dbmanager
18 |     ports:
19 |       - "3000:3000"
20 |     volumes:
21 |       - ./data:/data
22 | 
23 |     networks:
24 |       - dis-network
25 |     depends_on:
26 |       rust_builder:
27 |         condition: service_completed_successfully
28 | 
29 | 
30 |   vector_store:
31 |     build: ./backend/vector_store
32 |     container_name: vectorstore
33 |     restart: on-failure
34 |     ports:
35 |       - "4000:4000"
36 | 
37 |     volumes:
38 |       - ./data:/data
39 | 
40 |     environment:
41 |       POSTGRES_URL: postgres://postgres:password123@postgres/disku
42 | 
43 |     networks:
44 |       - dis-network
45 | 
46 |   rust_builder:
47 |     build: ./backend/rust_parser
48 |     image: rust_parser
49 | 
50 |   postgres:
51 |     image: pgvector/pgvector:pg16
52 |     ports:
53 |       - "5432:5432"
54 |     volumes:
55 |       - ./data/postgres:/var/lib/postgresql/data
56 |     environment:
57 |       POSTGRES_USER: postgres
58 |       POSTGRES_PASSWORD: password123
59 |       POSTGRES_HOST: postgres
60 |       POSTGRES_DB: disku
61 | 
62 |     networks:
63 |       - dis-network
64 | 
65 | networks:
66 |   dis-network:
67 | 


--------------------------------------------------------------------------------
/backend/vector_store/migrations/0001_initial.sql:
--------------------------------------------------------------------------------
 1 | -- Initial database migration for the vector search
 2 | BEGIN;
 3 | 
 4 | CREATE TABLE IF NOT EXISTS course (
 5 |     id text PRIMARY KEY,
 6 |     title text NOT NULL,
 7 |     content text NOT NULL,
 8 |     last_modified timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL
 9 | );
10 | 
11 | CREATE TABLE IF NOT EXISTS coordinator (
12 |        email text PRIMARY KEY,
13 |        full_name text NOT NULL
14 | );
15 | 
16 | CREATE TABLE IF NOT EXISTS course_coordinator (
17 |     course_id text,
18 |     email text,
19 |     PRIMARY KEY (course_id, email),
20 |     FOREIGN KEY (course_id) REFERENCES course(id),
21 |     FOREIGN KEY (email) REFERENCES coordinator(email)
22 | );
23 | 
24 | CREATE EXTENSION IF NOT EXISTS vector;
25 | 
26 | CREATE TABLE IF NOT EXISTS name_embedding (
27 |     email text PRIMARY KEY,
28 |     embedding vector(384) NOT NULL,
29 |     FOREIGN KEY (email) REFERENCES coordinator(email)
30 | );
31 | 
32 | CREATE TABLE IF NOT EXISTS title_embedding (
33 |     course_id text PRIMARY KEY,
34 |     embedding vector(384) NOT NULL,
35 |     last_modified timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL,
36 |     FOREIGN KEY (course_id) REFERENCES course(id)
37 | );
38 | 
39 | CREATE TABLE IF NOT EXISTS content_embedding (
40 |     course_id text PRIMARY KEY,
41 |     embedding vector(384) NOT NULL,
42 |     last_modified timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL,
43 |     FOREIGN KEY (course_id) REFERENCES course(id)
44 | );
45 | 
46 | COMMIT;
47 | 


--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "frontend",
 3 |     "version": "1.0.0",
 4 |     "private": true,
 5 |     "scripts": {
 6 |         "dev": "vite dev",
 7 |         "build": "vite build",
 8 |         "preview": "vite preview",
 9 |         "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
10 |         "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
11 |         "lint": "prettier --check . && eslint ."
12 |     },
13 |     "devDependencies": {
14 |         "@eslint/js": "^9.9.0",
15 |         "@sveltejs/adapter-node": "^2.0.0",
16 |         "@sveltejs/kit": "^2.5.27",
17 |         "@sveltejs/vite-plugin-svelte": "^4.0.0",
18 |         "@types/node": "^20.4.9",
19 |         "autoprefixer": "^10.4.14",
20 |         "eslint": "^9.9.0",
21 |         "globals": "^15.9.0",
22 |         "postcss": "^8.4.31",
23 |         "prettier": "^3.3.3",
24 |         "svelte": "^5.0.0",
25 |         "svelte-check": "^4.0.0",
26 |         "tailwindcss": "^3.3.2",
27 |         "tslib": "^2.4.1",
28 |         "typescript": "^5.5.0",
29 |         "typescript-eslint": "^8.1.0",
30 |         "vite": "^5.4.4"
31 |     },
32 |     "type": "module",
33 |     "dependencies": {
34 |         "@popperjs/core": "^2.11.8",
35 |         "chart.js": "^4.3.3",
36 |         "chartjs-plugin-datalabels": "^2.2.0",
37 |         "lorem-ipsum": "^2.0.8",
38 |         "postgres": "^3.3.4",
39 |         "prettier-plugin-svelte": "^3.2.6",
40 |         "tailwind-merge": "^1.13.2"
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/frontend/src/components/CheckboxMenu.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |     // Create a function that takes a list of possible values and returns an object with the
 3 | 
 4 |     interface Props {
 5 |         // possible values as keys and the values as booleans from checkboxes
 6 |         header_name: string;
 7 |         options: string[];
 8 |         selected: string[];
 9 |     }
10 | 
11 |     let { header_name, options, selected = $bindable() }: Props = $props();
12 | </script>
13 | 
14 | <div class="">
15 |     <div class="bg-kuGray text-white text-center px-2 py-1 w-full">
16 |         <h2>{header_name}</h2>
17 |     </div>
18 | 
19 |     <div class="flex flex-col">
20 |         <button
21 |             class="bg-kuRed text-white text-center text-xs {selected.length
22 |                 ? 'visible'
23 |                 : 'invisible'}"
24 |             onclick={() => (selected = [])}>Clear</button
25 |         >
26 |         {#each options as option}
27 |             <!--Align label before checkbox-->
28 |             <label
29 |                 class="flex items-center relative cursor-pointer py-1.5 ml-2"
30 |             >
31 |                 <input
32 |                     class="text-kuRed accent-current"
33 |                     type="checkbox"
34 |                     bind:group={selected}
35 |                     name="header_name"
36 |                     value={option}
37 |                 />
38 |                 <span class="text-left w-full absolute leading-none ml-6">
39 |                     {option}
40 |                 </span>
41 |             </label>
42 |         {/each}
43 |     </div>
44 | </div>
45 | 


--------------------------------------------------------------------------------
/backend/rust_parser/src/parser/workload_information.rs:
--------------------------------------------------------------------------------
 1 | use crate::parser::{Workload, WorkloadType};
 2 | use anyhow::{ensure, Context, Result};
 3 | 
 4 | use tl::VDom;
 5 | 
 6 | pub fn parse_workloads(dom: &VDom) -> Result<Vec<Workload>> {
 7 |     let parser = dom.parser();
 8 |     let workload_table = dom
 9 |         .get_element_by_id("course-load")
10 |         .context("Unable to find workload table")?
11 |         .get(parser)
12 |         .unwrap()
13 |         .as_tag()
14 |         .unwrap();
15 | 
16 |     let mut lis = workload_table
17 |         .query_selector(parser, "li")
18 |         .context("Unable to find any workload information")?
19 |         .map(|node| node.get(parser).unwrap().inner_text(parser))
20 |         .skip(2)
21 |         .peekable();
22 | 
23 |     let mut workloads = Vec::new();
24 |     // take two at a time from lis
25 |     // first is the workload type
26 |     // second is the workload value
27 |     let mut pair: Vec<String> = Vec::new();
28 |     while lis.peek().is_some() {
29 |         if pair.len() == 2 {
30 |             let workload = Workload {
31 |                 workload_type: WorkloadType::from_str(&pair[0])?,
32 |                 hours: pair[1]
33 |                     .replace(',', ".")
34 |                     .parse::<f32>()
35 |                     .context(format!("Unable to parse workload hours: {}", pair[1]))?,
36 |             };
37 |             workloads.push(workload);
38 |             pair.clear();
39 |         }
40 |         pair.push(lis.next().unwrap().to_string());
41 |     }
42 | 
43 |     ensure!(
44 |         !workloads.is_empty(),
45 |         "Unable to parse workload information"
46 |     );
47 |     Ok(workloads)
48 | }
49 | 


--------------------------------------------------------------------------------
/backend/rust_parser/src/parser/content_serialiser.rs:
--------------------------------------------------------------------------------
 1 | use crate::parser::Description;
 2 | 
 3 | use tl::VDom;
 4 | 
 5 | use anyhow::{Context, Result};
 6 | 
 7 | // grab some specific htmls and return the html
 8 | pub fn grab_htmls(dom: &VDom) -> Result<Description> {
 9 |     let parser = dom.parser();
10 |     let content_html = dom
11 |         .get_element_by_id("course-content")
12 |         .context("Unable to find course content")?
13 |         .get(parser)
14 |         .context("Unable to grab parser for the dom, this should not happen")?;
15 | 
16 |     let learning_outcome_html = dom
17 |         .get_element_by_id("course-description")
18 |         .context("Unable to find learning outcomes")?
19 |         .get(parser)
20 |         .context("Unable to grab parser for the dom, this should not happen")?
21 |         .inner_html(parser);
22 | 
23 |     // Handle that recommended qualifications might be none
24 |     let recommended_qualifications_html = dom
25 |         .get_element_by_id("course-skills") // this might be none
26 |         .and_then(|elem| {
27 |             Some(
28 |                 elem.get(parser)
29 |                     .context("Unable to grab parser for the dom, this should not happen")
30 |                     .ok()?
31 |                     .inner_html(parser),
32 |             )
33 |         });
34 | 
35 |     let recommended_qualifications_html =
36 |         recommended_qualifications_html.filter(|s| !(s.contains("Ingen") || s.contains("None")));
37 | 
38 |     // grab the first 300 chars of the content
39 |     let summary = content_html
40 |         .inner_text(parser)
41 |         .chars()
42 |         .take(300)
43 |         .collect::<String>();
44 | 
45 |     Ok(Description {
46 |         content: content_html.inner_html(parser).to_string(),
47 |         learning_outcome: learning_outcome_html.to_string(),
48 |         recommended_qualifications: recommended_qualifications_html.map(|s| s.to_string()),
49 |         summary,
50 |     })
51 | }
52 | 


--------------------------------------------------------------------------------
/frontend/src/routes/sitemap.xml/+server.ts:
--------------------------------------------------------------------------------
 1 | import { dev } from "$app/environment";
 2 | 
 3 | const url = dev ? "http://localhost:3000" : "https://kucourses.dk";
 4 | 
 5 | function today_yyyy_mm_dd(): string {
 6 |     const d = new Date();
 7 |     const iso = d.toISOString();
 8 |     return iso.substring(0, 10);
 9 | }
10 | 
11 | function generate_xml(course_id: string): string {
12 |     return `
13 |     <url>
14 |         <loc>${url}/course/${course_id}</loc>
15 |         <priority>0.8</priority>
16 |         <lastmod>${today_yyyy_mm_dd()}</lastmod>
17 |     </url>
18 |     `;
19 | }
20 | 
21 | export async function GET() {
22 |     // grab all course-ids from the get-course-ids endpoint which gives a list of json objects
23 |     // [{course_id: "course1"}, {course_id: "course2"}]
24 |     //
25 |     const res = await fetch(`${url}/api/get-all-course-ids`);
26 |     const json = await res.json();
27 |     const today = today_yyyy_mm_dd();
28 | 
29 |     const course_ids = json.map((x: { course_id: string }) => x.course_id);
30 | 
31 |     return new Response(
32 |         `
33 |     <?xml version="1.0" encoding="UTF-8" ?>
34 |     <urlset
35 |       xmlns="https://www.sitemaps.org/schemas/sitemap/0.9"
36 |       xmlns:xhtml="https://www.w3.org/1999/xhtml"
37 |       xmlns:mobile="https://www.google.com/schemas/sitemap-mobile/1.0"
38 |       xmlns:news="https://www.google.com/schemas/sitemap-news/0.9"
39 |       xmlns:image="https://www.google.com/schemas/sitemap-image/1.1"
40 |       xmlns:video="https://www.google.com/schemas/sitemap-video/1.1"
41 |     >
42 | 
43 |       <!-- root -->
44 |        <url>
45 |         <loc>${url}</loc>
46 |         <lastmod>${today}</lastmod>
47 |         <priority>1.0</priority>
48 |       </url>
49 |       <!-- course pages -->
50 |       ${course_ids.map(generate_xml).join("\n")}
51 | 
52 |     </urlset>`.trim(),
53 |         {
54 |             headers: {
55 |                 "Content-Type": "application/xml",
56 |             },
57 |         }
58 |     );
59 | }
60 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | All contributions, be it big or small, are encouraged and any help with the project is greatly appreciated.
 4 | 
 5 | ## I want to contribute
 6 | ### Boring legal nonsense
 7 | By creating a pull request to the project you fully agree that you have the rights to distribute the code in question and that the code, if licensed, can exist under the MIT license.
 8 | ### Feature requests
 9 | If you have a feature request, please check if it isn't already present in the [backlog](https://github.com/users/joshniemela/projects/5). If it isn't, then either open up an issue or send the request to [Josh Niemelä](mailto:josh@jniemela.dk).
10 | ### Pull requests
11 | Anything goes, but it is expected that a pull request should solve some particular issue in the active issues, or something in the backlog (make a feature request if your contribution doesn't fit the aforementioned). This pull request is expected to contain code that has been run through its respective formatter (Black for Python, Cljfmt for Clojure, (To come) for TS/Svelte).
12 | 
13 | ### Running the project
14 | The project contains a .env file which can be set to development or production, developmenet is the one that should be used. The individual components of the project can be run collectively using `docker-compose` or individually (`lein run` for Clojure, `pipenv run` for Python, `npm run dev` for TS/Svelte).
15 | 
16 | ## Bugs
17 | If you've found a bug or something that isn't intuitive in the user interface:
18 | * Ensure this isn't already a known bug by looking at the [issues](https://github.com/joshniemela/disproject/issues).
19 | * Try to replicate the unexpected behaviour.
20 | * Please include the OS, Browser and other useful information in the bug report to make it easier to narrow it down.
21 | * Write an issue about the problem, eventually possible solutions to the problem.
22 | * Lastly, feel welcome to assign yourself to fixing the problem or tagging someone who might be able to fix it.
23 | 
24 | 


--------------------------------------------------------------------------------
/frontend/src/components/BigCheckbox.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |     // Create a function that takes a list of possible values and returns an object with the
 3 | 
 4 |     interface Props {
 5 |         // possible values as keys and the values as booleans from checkboxes
 6 |         header_name: string;
 7 |         options: string[];
 8 |         selected: string[];
 9 |         show?: boolean;
10 |     }
11 | 
12 |     let {
13 |         header_name,
14 |         options,
15 |         selected = $bindable(),
16 |         show = $bindable(false),
17 |     }: Props = $props();
18 | </script>
19 | 
20 | <div class="flex flex-col w-full text-sm md:text-base">
21 |     <button
22 |         type="button"
23 |         class="bg-kuRed text-white text-center px-2 py-1"
24 |         onclick={() => (show = !show)}
25 |     >
26 |         <h2>
27 |             {show ? "Hide" : "Show"}
28 |             {header_name}s {selected.length ? `(${selected.length})` : ""}
29 |         </h2>
30 |     </button>
31 | 
32 |     <div class="flex flex-col {show ? 'visible' : 'hidden'}">
33 |         <button
34 |             class="bg-kuRed text-white mt-2 text-center text-xs {selected.length
35 |                 ? 'visible'
36 |                 : 'invisible'}"
37 |             onclick={() => (selected = [])}>Clear</button
38 |         >
39 |         {#each options as option}
40 |             <!--Align label before checkbox-->
41 |             <label
42 |                 class="flex items-center relative cursor-pointer py-1.5 ml-2"
43 |             >
44 |                 <input
45 |                     class="text-kuRed accent-current"
46 |                     type="checkbox"
47 |                     bind:group={selected}
48 |                     name="header_name"
49 |                     value={option}
50 |                 />
51 |                 <span class="text-left w-full absolute leading-none ml-6">
52 |                     {option}
53 |                 </span>
54 |             </label>
55 |         {/each}
56 |     </div>
57 | </div>
58 | 


--------------------------------------------------------------------------------
/frontend/src/components/Footer/Footer.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |     import Facebook from "../../assets/Facebook.svelte";
 3 |     import Github from "../../assets/Github.svelte";
 4 |     import { obfuscateEmail } from "../../stores";
 5 | 
 6 |     interface Props {
 7 |         additionalClasses?: string;
 8 |     }
 9 | 
10 |     let { additionalClasses = "" }: Props = $props();
11 | 
12 |     let email_fn = obfuscateEmail("josh@jniemela.dk");
13 | </script>
14 | 
15 | <footer
16 |     class="mt-2 p-4 border-t-2 border-kuGray flex justify-between items-center {additionalClasses}"
17 | >
18 |     <div class="text-sm">
19 |         <span class="block">
20 |             Developed by
21 |             <a class="underline font-bold" href="https://jniemela.dk">
22 |                 Joshua Niemelä
23 |             </a>
24 |             and
25 |             <a
26 |                 class="underline font-bold"
27 |                 href="https://github.com/joshniemela/KU-courses/graphs/contributors"
28 |             >
29 |                 others
30 |             </a>
31 |         </span>
32 |         <span class="block">
33 |             <a class="underline font-bold" href="mailto:{email_fn()}">
34 |                 {email_fn()}
35 |             </a>
36 |         </span>
37 |     </div>
38 |     <div class="flex space-x-4 text-sm">
39 |         <a
40 |             class="block animated-icon"
41 |             target="_blank"
42 |             href="https://github.com/joshniemela/KU-courses"
43 |         >
44 |             <Github width="3rem" height="3rem" />
45 |         </a>
46 | 
47 |         <a
48 |             href="https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fkucourses.dk"
49 |             rel="noopener"
50 |             target="_blank"
51 |             class="block animated-icon md:pr-8 pr-0"
52 |         >
53 |             <Facebook width="3rem" height="3rem" />
54 |         </a>
55 |     </div>
56 | </footer>
57 | 
58 | <style scoped>
59 |     .animated-icon {
60 |         transition-duration: 300ms;
61 | 
62 |         transform: scale(1);
63 |     }
64 | 
65 |     .animated-icon:hover {
66 |         transform: scale(1.25);
67 |     }
68 | </style>
69 | 


--------------------------------------------------------------------------------
/frontend/src/theme.ts:
--------------------------------------------------------------------------------
 1 | const theme = {
 2 |     colors: {
 3 |         kuRed: "#901a1e",
 4 |         kuGray: "#14314f",
 5 |         darkGray: "#2b2d41",
 6 |         dulledWhite: "#f4f5f7",
 7 |         greyedOut: "#8d99ad",
 8 |         brand: {
 9 |             100: "#270102",
10 |             200: "#370002",
11 |             300: "#630307",
12 |             400: "#780D10",
13 |             500: "#901A1E",
14 |             600: "#B84044",
15 |             700: "#D27275",
16 |             800: "#E5A3A5",
17 |             900: "#FCEBEC",
18 |         },
19 |         neutral: {
20 |             100: "#03080E",
21 |             200: "#101E2D",
22 |             300: "#1A2A39",
23 |             400: "#273441",
24 |             500: "#3A4550",
25 |             600: "#7B7E81",
26 |             700: "#C2C2C2",
27 |             800: "#F2EFEF",
28 |             900: "#FFFFFF",
29 |         },
30 |         green: {
31 |             100: "#013100",
32 |             200: "#026200",
33 |             300: "#049001",
34 |             400: "#0FBC0C",
35 |             500: "#24D921",
36 |             600: "#3EEE3B",
37 |             700: "#71FF6F",
38 |             800: "#88FF86",
39 |             900: "#AEFFAC",
40 |         },
41 |         orange: {
42 |             100: "#302300",
43 |             200: "#624600",
44 |             300: "#906801",
45 |             400: "#BC8A0C",
46 |             500: "#D9A521",
47 |             600: "#EEBB3B",
48 |             700: "#FFD66F",
49 |             800: "#FFDD86",
50 |             900: "#FFE8AC",
51 |         },
52 |         red: {
53 |             100: "#300000",
54 |             200: "#620000",
55 |             300: "#900101",
56 |             400: "#BC0C0C",
57 |             500: "#D92121",
58 |             600: "#EE3B3B",
59 |             700: "#FF6F6F",
60 |             800: "#FF8686",
61 |             900: "#FFACAC",
62 |         },
63 |         blue: {
64 |             100: "#001330",
65 |             200: "#002762",
66 |             300: "#013B90",
67 |             400: "#0C52BC",
68 |             500: "#216AD9",
69 |             600: "#3B82EE",
70 |             700: "#6FA8FF",
71 |             800: "#86B6FF",
72 |             900: "#ACCDFF",
73 |         },
74 |     },
75 | };
76 | 
77 | export default theme;
78 | 


--------------------------------------------------------------------------------
/frontend/src/assets/Facebook.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |     interface Props {
 3 |         width?: string;
 4 |         height?: string;
 5 |     }
 6 | 
 7 |     let { width = "1rem", height = "1rem" }: Props = $props();
 8 | </script>
 9 | 
10 | <svg
11 |     version="1.1"
12 |     id="svg9"
13 |     width={width}
14 |     height={height}
15 |     viewBox="0 0 700 700"
16 |     xmlns="http://www.w3.org/2000/svg"
17 | >
18 |     <defs id="defs13">
19 |         <clipPath clipPathUnits="userSpaceOnUse" id="clipPath25">
20 |             <path d="M 0,700 H 700 V 0 H 0 Z" id="path23"></path>
21 |         </clipPath>
22 |     </defs>
23 |     <g
24 |         id="g17"
25 |         transform="matrix(1.3333333,0,0,-1.3333333,-133.33333,799.99999)"
26 |     >
27 |         <g id="g19">
28 |             <g id="g21" clip-path="url(#clipPath25)">
29 |                 <g id="g27" transform="translate(600,350)">
30 |                     <path
31 |                         d="m 0,0 c 0,138.071 -111.929,250 -250,250 -138.071,0 -250,-111.929 -250,-250 0,-117.245 80.715,-215.622 189.606,-242.638 v 166.242 h -51.552 V 0 h 51.552 v 32.919 c 0,85.092 38.508,124.532 122.048,124.532 15.838,0 43.167,-3.105 54.347,-6.211 V 81.986 c -5.901,0.621 -16.149,0.932 -28.882,0.932 -40.993,0 -56.832,-15.528 -56.832,-55.9 V 0 h 81.659 l -14.028,-76.396 h -67.631 V -248.169 C -95.927,-233.218 0,-127.818 0,0"
32 |                         style="fill:#0866ff;fill-opacity:1;fill-rule:nonzero;stroke:none"
33 |                         id="path29"
34 |                     ></path>
35 |                 </g>
36 |                 <g id="g31" transform="translate(447.9175,273.6036)">
37 |                     <path
38 |                         d="M 0,0 14.029,76.396 H -67.63 v 27.019 c 0,40.372 15.838,55.899 56.831,55.899 12.733,0 22.981,-0.31 28.882,-0.931 v 69.253 c -11.18,3.106 -38.509,6.212 -54.347,6.212 -83.539,0 -122.048,-39.441 -122.048,-124.533 V 76.396 h -51.552 V 0 h 51.552 v -166.242 c 19.343,-4.798 39.568,-7.362 60.394,-7.362 10.254,0 20.358,0.632 30.288,1.831 L -67.63,0 Z"
39 |                         style="fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none"
40 |                         id="path33"
41 |                     ></path>
42 |                 </g>
43 |             </g>
44 |         </g>
45 |     </g>
46 | </svg>
47 | 


--------------------------------------------------------------------------------
/frontend/src/routes/course/[courseId]/+page.server.ts:
--------------------------------------------------------------------------------
 1 | import { apiUrl } from "../../../stores";
 2 | import { total_hours } from "../../../course";
 3 | import type { Course, Statistics, Grade } from "../../../course";
 4 | 
 5 | // ungraded returns absent, total, fail and pass
 6 | function transform_ungraded_stats(stats: Statistics) {
 7 |     return [
 8 |         { grade: "Fail", count: stats.fail },
 9 |         { grade: "Pass", count: stats.pass },
10 |         { grade: "Absent", count: stats.absent },
11 |     ];
12 | }
13 | 
14 | function transform_graded_stats(stats: Statistics) {
15 |     return stats.grades.map((grade) => {
16 |         return { grade: grade.grade, count: grade.count };
17 |     });
18 | }
19 | 
20 | function transform_stats(stats: Statistics | null) {
21 |     if (stats == null) {
22 |         return null;
23 |     } else {
24 |         if (stats.grades == null) {
25 |             return transform_ungraded_stats(stats);
26 |         } else {
27 |             return transform_graded_stats(stats);
28 |         }
29 |     }
30 | }
31 | 
32 | function null_to_zero(grades: Grade[] | null) {
33 |     // in each grade, count pair, if count is null, set it to 0
34 |     if (grades == null) {
35 |         return undefined;
36 |     }
37 |     return grades.map((grade: Grade) => {
38 |         if (grade.count == null) {
39 |             return { grade: grade.grade, count: 0 };
40 |         } else {
41 |             return grade;
42 |         }
43 |     });
44 | }
45 | 
46 | export async function load({ fetch, params }) {
47 |     const { courseId } = params;
48 |     const API_URL = apiUrl();
49 | 
50 |     const res = await fetch(
51 |         `${API_URL}/get-detailed-course-info?id=${courseId}`,
52 |         {
53 |             method: "GET",
54 |             headers: {
55 |                 accept: "application/json",
56 |                 "Content-Type": "application/json",
57 |             },
58 |         }
59 |     );
60 | 
61 |     const course: Course = await res.json();
62 |     const grades = null_to_zero(transform_stats(course.statistics));
63 |     const stats = course.statistics;
64 |     if (stats !== null && stats !== undefined) {
65 |         stats.grades = grades !== undefined ? grades : [];
66 |     }
67 |     return {
68 |         courseId: courseId,
69 |         course: course,
70 |         totalHours: total_hours(course),
71 |         statistics: stats,
72 |         loading: false,
73 |     };
74 | }
75 | 


--------------------------------------------------------------------------------
/frontend/src/components/Loader/Loader.svelte:
--------------------------------------------------------------------------------
 1 | <script>
 2 |     import theme from "../../theme";
 3 | </script>
 4 | 
 5 | <!-- Pure CSS Loader from 'https://loading.io/css/' -->
 6 | <div class="container">
 7 |     <div class="lds-grid" style="--color: {theme.colors.brand[500]}">
 8 |         <div></div>
 9 |         <div></div>
10 |         <div></div>
11 |         <div></div>
12 |         <div></div>
13 |         <div></div>
14 |         <div></div>
15 |         <div></div>
16 |         <div></div>
17 |     </div>
18 | </div>
19 | 
20 | <style scoped>
21 |     .container {
22 |         display: flex;
23 |         justify-content: center;
24 |         align-items: center;
25 |     }
26 | 
27 |     .lds-grid {
28 |         display: inline-block;
29 |         position: relative;
30 |         width: 80px;
31 |         height: 80px;
32 |     }
33 |     .lds-grid div {
34 |         position: absolute;
35 |         width: 16px;
36 |         height: 16px;
37 |         border-radius: 50%;
38 |         background: var(--color);
39 |         animation: lds-grid 1.2s linear infinite;
40 |     }
41 |     .lds-grid div:nth-child(1) {
42 |         top: 8px;
43 |         left: 8px;
44 |         animation-delay: 0s;
45 |     }
46 |     .lds-grid div:nth-child(2) {
47 |         top: 8px;
48 |         left: 32px;
49 |         animation-delay: -0.4s;
50 |     }
51 |     .lds-grid div:nth-child(3) {
52 |         top: 8px;
53 |         left: 56px;
54 |         animation-delay: -0.8s;
55 |     }
56 |     .lds-grid div:nth-child(4) {
57 |         top: 32px;
58 |         left: 8px;
59 |         animation-delay: -0.4s;
60 |     }
61 |     .lds-grid div:nth-child(5) {
62 |         top: 32px;
63 |         left: 32px;
64 |         animation-delay: -0.8s;
65 |     }
66 |     .lds-grid div:nth-child(6) {
67 |         top: 32px;
68 |         left: 56px;
69 |         animation-delay: -1.2s;
70 |     }
71 |     .lds-grid div:nth-child(7) {
72 |         top: 56px;
73 |         left: 8px;
74 |         animation-delay: -0.8s;
75 |     }
76 |     .lds-grid div:nth-child(8) {
77 |         top: 56px;
78 |         left: 32px;
79 |         animation-delay: -1.2s;
80 |     }
81 |     .lds-grid div:nth-child(9) {
82 |         top: 56px;
83 |         left: 56px;
84 |         animation-delay: -1.6s;
85 |     }
86 |     @keyframes lds-grid {
87 |         0%,
88 |         100% {
89 |             opacity: 1;
90 |         }
91 |         50% {
92 |             opacity: 0.5;
93 |         }
94 |     }
95 | </style>
96 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # KU-Courses
 2 | ⚠️ Future developement will continue on [Forgejo](https://git.argmin.dk/joshnie/KU-courses)
 3 | 
 4 | ![Example of KU-Courses](frontend/static/assets/preview.png "KU Courses")
 5 | 
 6 | The entire application is governed through the `docker-compose.yml` file and is built with `docker compose`:
 7 | 
 8 | ## Starting the application
 9 | 1. Install `docker` and `docker-compose`, this may need a restart of your system since Docker is a very low level program.
10 | 2. Run `docker compose up --build` as either a user with permissions to docker, or with `sudo`/`doas`, the build flag is required if the backend or frontend code has been changed, additionally `-d` will make it detach from the terminal.
11 |   a. Add folders `exam_pdfs` and `pages` to the `data` folder.
12 | 4. Wait for the scraper in the backend to complete scraping pages, this may take about 15 minutes.
13 | 5. Run `docker compose restart`, this is required so that the parser will run and so that the vector store can create new embeddings.
14 | 6. ???
15 | 7. PROFIT!!!
16 | 
17 | 
18 | ## db-manager
19 | The backend is built with Clojure, a functional programmering language based on Lisp which runs on the Java Virtual Machine.  
20 | This part serves multiple purposes, it is responsible for scraping the course pages from KU as well as the statistics from STADS.  
21 | The backend also serves the frontend and contains the "datascript" database and is responsible for refreshing and various services occasionally (this feature is partially broken at the moment).
22 | 
23 | ## vector_store
24 | This service is responsible for the semantic searches used in the `get_course_overviews` route, instead of using trigrams or full-text, we decided to use vector searches for the lower latency.
25 | 
26 | ## rust_parser
27 | This service is the parser that takes the scraped course pages and parses them into a format we can use in the database for searching and for serving to the frontend.
28 | 
29 | ## frontend
30 | Frontend is built in Svelte/Typescript. This is a highly responsible SPA that shows the courses in the form of cards which can be clicked into to get a more detailed view of the course.
31 | 
32 | 
33 | # Credits
34 | * Thanks to [Jákup Lützen](https://github.com/BinFlush) for creating the original course parser in Python.
35 | * Thanks to [Kristian Pedersen](https://github.com/KristianDamPedersen) for creating the original frontend, and help in designing the architecture and first database schema.
36 | * Thanks to [Zander Bournonville](https://github.com/DrZder) for creating the statistics parser.
37 | 


--------------------------------------------------------------------------------
/backend/vector_store/src/populate.rs:
--------------------------------------------------------------------------------
 1 | use super::{Coordinator, PostgresDB};
 2 | use anyhow::Result;
 3 | use nanohtml2text::html2text;
 4 | use serde::Deserialize;
 5 | use std::fs::File;
 6 | use std::io::BufReader;
 7 | use std::path::Path;
 8 | 
 9 | #[derive(Deserialize, Clone)]
10 | pub struct Document {
11 |     pub title: String,
12 |     pub info: Info,
13 |     pub description: Description,
14 |     pub logistics: Logistics,
15 | }
16 | 
17 | #[derive(Deserialize, Clone)]
18 | pub struct Logistics {
19 |     pub coordinators: Vec<Coordinator>,
20 | }
21 | 
22 | #[derive(Deserialize, Clone)]
23 | pub struct Description {
24 |     pub content: String,
25 | }
26 | 
27 | #[derive(Deserialize, Clone)]
28 | pub struct Info {
29 |     pub id: String,
30 | }
31 | 
32 | /// Upserts all the documents in the directory into the database
33 | /// This function is used to populate the database
34 | /// TODO: Remove population functionality from this service
35 | pub async fn upsert_documents_from_path(db: &PostgresDB, path: &Path) -> Result<()> {
36 |     let documents = read_jsons(path)?;
37 |     for document in documents {
38 |         db.upsert_document(&document).await?;
39 |     }
40 |     Ok(())
41 | }
42 | 
43 | /// Reads a json file from the path and returns a Document
44 | /// This function also converts the html content to plain text and removes newlines
45 | fn read_json(path: &Path) -> Result<Document> {
46 |     // TODO: this entire thing is awful, please rewrite
47 |     let file = File::open(path)?;
48 |     let reader = BufReader::new(file);
49 |     let mut doc: Document = serde_json::from_reader(reader)?;
50 |     doc.description.content = html2text(&doc.description.content);
51 |     doc.description.content = doc.description.content.replace('\n', " ");
52 |     doc.description.content = doc.description.content.replace('\t', " ");
53 |     doc.description.content = doc.description.content.replace('\r', " ");
54 |     Ok(doc)
55 | }
56 | 
57 | /// Reads all the jsons in the directory and returns a Vec<Document>
58 | /// This function also converts the html content to plain text and removes newlines
59 | /// This function is used to populate the database
60 | /// TODO: Remove population functionality from this service
61 | fn read_jsons(path: &Path) -> Result<Vec<Document>> {
62 |     // this should read all the jsons in the directory
63 |     let file_names = std::fs::read_dir(path)?;
64 |     let mut documents = Vec::new();
65 |     for file_name in file_names {
66 |         let file_name = file_name?;
67 |         let path = file_name.path();
68 |         let document = read_json(&path)?;
69 |         documents.push(document);
70 |     }
71 |     Ok(documents)
72 | }
73 | 


--------------------------------------------------------------------------------
/backend/db-manager/src/exam_scraper/core.clj:
--------------------------------------------------------------------------------
 1 | (ns exam-scraper.core
 2 |   (:require [clojure.java.io :as io]
 3 |             [clojure.string :as string]
 4 |             [clojure.set :as set])
 5 |   (:import (java.io File)
 6 |           (org.apache.commons.cli DefaultParser)
 7 |           (technology.tabula CommandLineApp)))
 8 | 
 9 | (defn to-command-line [options]
10 |   (let [parser (DefaultParser.)
11 |         build-options (CommandLineApp/buildOptions)
12 |         args (into-array String options)]
13 |     (.parse parser build-options args)))
14 | 
15 | (def tabula-options ["-f" "TSV" "-g" "-p" "all"])
16 | 
17 | (defn convert-exam-pdf-to-tsv [pdf-file out-file]
18 |   (let [cmd-line (to-command-line tabula-options)
19 |         cli-app (CommandLineApp. System/out cmd-line)]
20 |         (.extractFileInto cli-app pdf-file out-file)))
21 | 
22 | (defn get-itx-courses-from-file [pdf-file]
23 |   ; the course code is on the first column, if the second column contains "ITX" anywheer in the row it's an ITX course
24 |   ; start by converting to tsv at a temporary location
25 |   (let [tsv-file (File/createTempFile "tabula" ".tsv")]
26 |     (try
27 |       (convert-exam-pdf-to-tsv pdf-file tsv-file)
28 |       (let [tsv (slurp tsv-file)
29 |             lines (string/split-lines tsv)
30 |             itx-courses (filter #(string/includes? % "ITX") lines)]
31 |         (println "[exam scraper] Found" (count itx-courses) "ITX courses in" pdf-file)
32 |         (map #(first (string/split % #"\t")) itx-courses))
33 |       (catch Exception e
34 |         (println "[exam scraper] Failed to extract ITX courses from" pdf-file ":" (.getMessage e))
35 |         (.printStackTrace e)
36 |         ; we don't handle this case yet,
37 |         ; we just return an empty list and move on
38 |         []))))
39 | 
40 | (defn get-itx-courses-from-dir [dir]
41 |   (let [pdf-files (drop 1 (file-seq (io/file dir)))
42 |         itx-courses (mapcat get-itx-courses-from-file pdf-files)]
43 |     (distinct itx-courses)))
44 | 
45 | 
46 | (defn to-itx [exams-list]
47 |   ; exams-list is a vector of maps, each key has a key, if this key is "Written", change it to "ITX")
48 |   (map (fn [exam]
49 |          (if (map? exam)
50 |            (set/rename-keys exam {"Written" "ITX"})
51 |               exam)) exams-list))
52 | 
53 | ; I mistankenly thought they were a vector of maps, but they are a vector of maps OR strings
54 | 
55 | 
56 | ; make a functio nthat only does this for a single course
57 | (defn patch-course-exam [course itx-course-ids]
58 |   (let [course-id (get-in course ["info" "id"])
59 |         itx? (some #(= course-id %) itx-course-ids)]
60 |     (if itx?
61 |       (assoc course "exams" (to-itx (get course "exams")))
62 |       course)))
63 | 
64 | (defn patch-courses-w-itx [courses itx-course-ids]
65 |     (map #(patch-course-exam % itx-course-ids) courses))
66 | 


--------------------------------------------------------------------------------
/backend/db-manager/src/course_scraper/upsert.clj:
--------------------------------------------------------------------------------
 1 | 
 2 | (ns course-scraper.upsert
 3 |   (:require [clojure.core :as c]
 4 |             [clojure.data.json :as json]
 5 |             [clojure.java.io :as io]
 6 |             [reitit.coercion.spec]
 7 |             [db-manager.db :refer [course-to-transaction remove-nils]]
 8 |             [datascript.core :as d])
 9 |   (:gen-class))
10 | 
11 | ; https://andersmurphy.com/2022/03/27/clojure-removing-namespace-from-keywords-in-response-middleware.html
12 | (defn transform-keys
13 |   [t coll]
14 |   (clojure.walk/postwalk (fn [x] (if (map? x) (update-keys x t) x)) coll))
15 | 
16 | (defn remove-namespace-keywords-in-response-middleware [handler & _]
17 |   (fn [req]
18 |     (let [resp (handler req)]
19 |       (cond-> resp
20 |         (comp map? :body) (update :body
21 |                                   (partial transform-keys
22 |                                            (comp keyword name)))))))
23 | 
24 | (defn try-finding-stats [stats-dir course-id]
25 |   (try
26 |     ; stats file is in stats-dir
27 |     (let [stats-file (str stats-dir course-id ".json")]
28 |       (json/read-str (slurp stats-file)))
29 |     (catch Exception e
30 |       nil)))
31 | 
32 | (defn transform-stats [stats]
33 |   (when-not (nil? (stats "exam"))
34 |     (let [exam (stats "exam")
35 |           pass-rate (exam "pass-rate")
36 |           mean (exam "mean")
37 |           median (exam "median")
38 |           graded? (exam "graded")
39 |           grades (exam "grades")
40 |           absent (exam "absent")
41 |           fail (exam "fail")
42 |           pass (exam "pass")
43 |           total (exam "total")]
44 |       (if graded?
45 |         {:statistics/pass-rate pass-rate
46 |          :statistics/absent absent
47 |          :statistics/fail fail
48 |          :statistics/pass pass
49 |          :statistics/total total
50 |          :statistics/mean mean
51 |          :statistics/median median
52 |          :statistics/grades grades}
53 |         {:statistics/pass-rate pass-rate
54 |          :statistics/pass pass
55 |          :statistics/absent absent
56 |          :statistics/fail fail
57 |          :statistics/total total}))))
58 | 
59 | 
60 | (defn transactions-w-stats [stats-finder courses] (map (fn [course]
61 |                                  (let [course-id (get-in course ["info" "id"])
62 |                                        stats (stats-finder course-id)
63 |                                        transacted-course (course-to-transaction course)]
64 |                                    (remove-nils (if stats
65 |                                                   (assoc transacted-course :course/statistics (transform-stats stats))
66 |                                                   transacted-course))))
67 |                                courses))
68 | 
69 | (defn read-json-file [file-name]
70 |   (let [file (slurp file-name)]
71 |     (json/read-str file)))
72 | 


--------------------------------------------------------------------------------
/frontend/src/stores.ts:
--------------------------------------------------------------------------------
 1 | import { writable } from "svelte/store";
 2 | import type { Writable } from "svelte/store";
 3 | import { browser } from "$app/environment";
 4 | 
 5 | const VERSION = "v1.0.0";
 6 | const VERSION_KEY = "version";
 7 | 
 8 | // Generic store functions
 9 | function setSessionStore<T>(key: string, value: T): void {
10 |     sessionStorage.setItem(key, JSON.stringify(value));
11 | }
12 | 
13 | function getSessionStore<T>(key: string): T | null {
14 |     return JSON.parse(sessionStorage.getItem(key) || "null") as T;
15 | }
16 | 
17 | // A generic writable store that persists to sessionStorage
18 | export function writableSession<T>(key: string, value: T): Writable<T> {
19 |     if (!browser) return writable(value); // Mock for SSR
20 |     const storedVersion = sessionStorage.getItem(VERSION_KEY);
21 | 
22 |     // Cache busting
23 |     if (storedVersion !== VERSION) {
24 |         sessionStorage.removeItem(key);
25 |         sessionStorage.setItem(VERSION_KEY, VERSION);
26 |     }
27 | 
28 |     const sessionValue = getSessionStore<T>(key);
29 |     if (!sessionValue) setSessionStore(key, value);
30 | 
31 |     const store = writable(sessionValue || value);
32 |     store.subscribe((value) => {
33 |         setSessionStore(key, value);
34 |     });
35 | 
36 |     return store;
37 | }
38 | //  END Generic store functions
39 | 
40 | // make a writableSession if we have a browser
41 | const emptyQuery = {
42 |     blocks: [],
43 |     degrees: [],
44 |     schedules: [],
45 |     exams: [],
46 |     departments: [],
47 |     languages: [],
48 |     search: "",
49 | };
50 | 
51 | export const queryStore = writableSession("filters", emptyQuery);
52 | 
53 | export function clearAll() {
54 |     // Cause the checkboxes to update
55 |     queryStore.update((store) => {
56 |         store.blocks = [];
57 |         store.degrees = [];
58 |         store.schedules = [];
59 |         store.exams = [];
60 |         store.departments = [];
61 |         store.languages = [];
62 |         store.search = "";
63 |         return store;
64 |     });
65 | }
66 | 
67 | // API URL
68 | export function apiUrl() {
69 |     return "https://kucourses.dk/api"; // SSR
70 |     // check that window is defined, this is used for checking if we are running in the browser
71 |     if (typeof window === "undefined") {
72 |         return "https://kucourses.dk/api"; // SSR
73 |     }
74 | 
75 |     const hostname = window.location.hostname;
76 |     if (hostname == "localhost") {
77 |         return "http://localhost:3000/api";
78 |     }
79 | 
80 |     // if running on another host, assume we are in prod
81 |     return "https://" + hostname + "/api";
82 | }
83 | 
84 | function xorString(str: string, key: number): string {
85 |     return str
86 |         .split("")
87 |         .map((char) => String.fromCharCode(char.charCodeAt(0) ^ key))
88 |         .join("");
89 | }
90 | // mail obfuscator/deobfuscator using XOR, this should return a function with no arguments that returns a string
91 | export function obfuscateEmail(email: string): () => string {
92 |     // generate the key by summing the char codes of the email and mod 256
93 |     const key =
94 |         email.split("").reduce((acc, char) => acc + char.charCodeAt(0), 0) %
95 |         256;
96 |     const obfuscated = xorString(email, key);
97 |     return () => xorString(obfuscated, key);
98 | }
99 | 


--------------------------------------------------------------------------------
/frontend/tailwind.config.js:
--------------------------------------------------------------------------------
 1 | /** @type {import('tailwindcss').Config} */
 2 | export default {
 3 |     content: ["./src/**/*.{html,js,svelte,ts}"],
 4 |     theme: {
 5 |         extend: {
 6 |             spacing: {
 7 |                 text: "clamp(45ch,50%,75ch)",
 8 |             },
 9 |             listStyleType: { square: "square" },
10 |             colors: {
11 |                 kuRed: "#901A1E",
12 |                 kuGray: "#333333",
13 |                 darkGray: "#2b2d41",
14 |                 dulledWhite: "#F4F5F7",
15 |                 greyedOut: "#8D99AD",
16 |                 brand: {
17 |                     100: "#270102",
18 |                     200: "#370002",
19 |                     300: "#630307",
20 |                     400: "#780D10",
21 |                     500: "#901A1E",
22 |                     600: "#B84044",
23 |                     700: "#D27275",
24 |                     800: "#E5A3A5",
25 |                     900: "#FCEBEC",
26 |                 },
27 |                 neutral: {
28 |                     100: "#03080E",
29 |                     200: "#101E2D",
30 |                     300: "#1A2A39",
31 |                     400: "#273441",
32 |                     500: "#3A4550",
33 |                     600: "#7B7E81",
34 |                     700: "#C2C2C2",
35 |                     800: "#F2EFEF",
36 |                     900: "#FFFFFF",
37 |                 },
38 |                 green: {
39 |                     100: "#013100",
40 |                     200: "#026200",
41 |                     300: "#049001",
42 |                     400: "#0FBC0C",
43 |                     500: "#24D921",
44 |                     600: "#3EEE3B",
45 |                     700: "#71FF6F",
46 |                     800: "#88FF86",
47 |                     900: "#AEFFAC",
48 |                 },
49 |                 orange: {
50 |                     100: "#302300",
51 |                     200: "#624600",
52 |                     300: "#906801",
53 |                     400: "#BC8A0C",
54 |                     500: "#D9A521",
55 |                     600: "#EEBB3B",
56 |                     700: "#FFD66F",
57 |                     800: "#FFDD86",
58 |                     900: "#FFE8AC",
59 |                 },
60 |                 red: {
61 |                     100: "#300000",
62 |                     200: "#620000",
63 |                     300: "#900101",
64 |                     400: "#BC0C0C",
65 |                     500: "#D92121",
66 |                     600: "#EE3B3B",
67 |                     700: "#FF6F6F",
68 |                     800: "#FF8686",
69 |                     900: "#FFACAC",
70 |                 },
71 |                 blue: {
72 |                     100: "#001330",
73 |                     200: "#002762",
74 |                     300: "#013B90",
75 |                     400: "#0C52BC",
76 |                     500: "#216AD9",
77 |                     600: "#3B82EE",
78 |                     700: "#6FA8FF",
79 |                     800: "#86B6FF",
80 |                     900: "#ACCDFF",
81 |                 },
82 |             },
83 |         },
84 |         keyframes: {
85 |             fadeIn: {
86 |                 "0%": { opacity: "0" },
87 |                 "100%": { opacity: "1" },
88 |             },
89 |         },
90 |         animation: {
91 |             fadeIn: "fadeIn 0.5s ease-in-out",
92 |         },
93 |     },
94 | };
95 | 


--------------------------------------------------------------------------------
/backend/db-manager/src/statistics/utils.clj:
--------------------------------------------------------------------------------
 1 | (ns statistics.utils)
 2 | 
 3 | (defn transform-obj
 4 |   "Convert {\" grade \": \" 12 \", \" count \": 13...} to {\" 12 \" 13...}"
 5 |   [obj]
 6 |   (into {} (map (fn [x] {(:grade x) (:count x)}) obj)))
 7 | 
 8 | (def passing-grades ["Passed" "12" "10" "7" "4" "02"])
 9 | (def failing-grades ["00" "-3" "Failed" "Absent"])
10 | (def grade-steps ["12" "10" "7" "4" "02" "00" "-3"])
11 | 
12 | (defn grade-repeats
13 |   "Repeat the grade-steps the number of times they appear in the exam-table,
14 |   for instance if 7 appears 3 times, we repeat 7 three times, this is a hack to calculate statistics"
15 |   [exam-table]
16 |   (let [transformed (transform-obj exam-table)
17 |         grades (select-keys transformed grade-steps)]
18 |     (apply concat (map (fn [x] (repeat (transformed x) (Integer/parseInt x))) (keys grades)))))
19 | 
20 | ; if the sum of all the 7 grades is 0 then we can assume the course is a pass/fail course
21 | ; and not a graded course, some pass/fail courses have the 7 grades in them as all zeros
22 | (defn is-pass-fail? [exam-table]
23 |   ; select the grades from the exam table that are in the 7 step scale
24 |   (let [grades (select-keys (transform-obj exam-table) grade-steps)]
25 |     ; some weird courses like LNAK10082E have a single graded thing and otherwise pass
26 |     (> 5 (apply + (vals grades)))))
27 | 
28 | (defn total [exam-table]
29 |   (apply + (vals (transform-obj exam-table))))
30 | 
31 | (defn pass-total [exam-table]
32 |   (let [grades (select-keys (transform-obj exam-table) passing-grades)]
33 |     (apply + (vals grades))))
34 | 
35 | (defn fail-total [exam-table]
36 |   (let [grades (select-keys (transform-obj exam-table) failing-grades)]
37 |     (apply + (vals grades))))
38 | 
39 | (defn pass-rate [exam-table]
40 |   (let [total-pass (pass-total exam-table)
41 |         total-fail (fail-total exam-table)]
42 |     (/ total-pass (+ total-pass total-fail))))
43 | 
44 | (defn median [exam-table]
45 |   (let [sorted-grades (sort (grade-repeats exam-table))
46 |         total-count (count sorted-grades)]
47 |     (defn nth-elem [n]
48 |       (nth sorted-grades n))
49 |     (if (odd? total-count)
50 |       (nth-elem (/ total-count 2))
51 |       (/ (+ (nth-elem (/ total-count 2)) (nth-elem (dec (/ total-count 2)))) 2))))
52 | 
53 | (defn stats-pass-fail [exam-table]
54 |   {:pass-rate-w-absent (pass-rate exam-table)
55 | 
56 |    ; this calculates the pass-rate without the absent students
57 |    ; (it will be higher than the pass-rate with absent students)
58 |    :pass-rate (pass-rate (filter (fn [x] (not= (:grade x) "Absent")) exam-table))
59 |    :total (total exam-table)
60 |    :pass (pass-total exam-table)
61 |    :fail (fail-total exam-table)
62 |    :absent ((transform-obj exam-table) "Absent")})
63 | 
64 | (defn squared-diff [x mean]
65 |   (* (- x mean) (- x mean)))
66 | 
67 | (defn stats-graded [exam-table]
68 |   (let [repeats (grade-repeats exam-table)
69 |         sum (reduce + repeats)
70 |         total (count repeats)
71 |         mean (/ sum total)
72 |         var (/ (reduce + (map (fn [x] (squared-diff x mean)) repeats)) (- total 1))]
73 |     {:mean mean
74 |      :median (median exam-table)
75 |      :var var
76 |      :grades exam-table}))
77 | 
78 | (defn stats [exam-table]
79 |   (if (is-pass-fail? exam-table)
80 |     (assoc (stats-pass-fail exam-table) :graded false)
81 |     (assoc (merge (stats-pass-fail exam-table) (stats-graded exam-table)) :graded true)))
82 | 


--------------------------------------------------------------------------------
/frontend/src/components/Changelog/ChangelogModal.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |     import { self } from "svelte/legacy";
 3 | 
 4 |     import { onMount } from "svelte";
 5 |     import CloseCross from "../../assets/CloseCross.svelte";
 6 |     import { modalStore } from "./store";
 7 | 
 8 |     const changelogItems = [
 9 |         {
10 |             date: "2024-01-13",
11 |             changes: [
12 |                 "Fixed opening in new tab and copying of course links.",
13 |                 "Fixed back button when coming from an external page.",
14 |             ],
15 |         },
16 |         {
17 |             date: "2024-02-05",
18 |             changes: [
19 |                 "Fixed that performing a vector search and a normal filter will destroy results that should appear.",
20 |                 "Fixed that statistics weren't being updated.",
21 |             ],
22 |         },
23 |         {
24 |             date: "2024-06-24",
25 |             changes: [
26 |                 "Switched to a quantised multilingual search so that results are more accurate, faster and work with all languages.",
27 |             ],
28 |         },
29 |         {
30 |             date: "2025-04-14",
31 |             changes: [
32 |                 "Fixed double fetching",
33 |                 "Migrated to Svelte 5",
34 |                 "Migrated to SvelteKit 2",
35 |             ],
36 |         },
37 | 
38 |         {
39 |             date: "2025-05-19",
40 |             changes: [
41 |                 "Update styling of checkboxes.",
42 |                 "Fix unhandled exception when parsing exam types",
43 |             ],
44 |         },
45 | 
46 |         {
47 |             date: "2025-05-21",
48 |             changes: [
49 |                 "Added the ability to search by language.",
50 |                 "Made cancelled courses more obvious.",
51 |             ],
52 |         },
53 |         {
54 |             date: "2025-08-09",
55 |             changes: [
56 |                 "Fixed text search being cleared on page refresh or navigation",
57 |                 "Fixed text search occasionally filtering out courses (it should only sort courses)",
58 |             ],
59 |         },
60 |     ];
61 | 
62 |     // Start with modal closed
63 |     onMount(() => {
64 |         modalStore.close();
65 |     });
66 | </script>
67 | 
68 | {#if $modalStore}
69 |     <dialog
70 |         class="absolute w-screen h-screen z-10 bg-black/40 flex justify-center items-center"
71 |         onclick={self(modalStore.close)}
72 |     >
73 |         <div
74 |             class="bg-white text-m font-normal h-fit mx-4 max-h-[75vh] md:max-h-[500px] overflow-y-scroll rounded"
75 |         >
76 |             <div
77 |                 class="flex justify-between mb-6 sticky top-0 bg-white pt-6 pb-4 border-b-2 px-6"
78 |             >
79 |                 <h3 class="font-bold text-2xl">Changelog</h3>
80 |                 <button type="button" onclick={modalStore.close}>
81 |                     <CloseCross classes="size-6" />
82 |                 </button>
83 |             </div>
84 |             <ul class="space-y-4 mb-6 mt-2 px-6">
85 |                 {#each changelogItems.reverse() as { date, changes }}
86 |                     <li>
87 |                         <p class="font-bold text-lg">{date}</p>
88 |                         <ul>
89 |                             {#each changes as change}
90 |                                 <li>{change}</li>
91 |                             {/each}
92 |                         </ul>
93 |                     </li>
94 |                 {/each}
95 |             </ul>
96 |         </div>
97 |     </dialog>
98 | {/if}
99 | 


--------------------------------------------------------------------------------
/frontend/src/course.ts:
--------------------------------------------------------------------------------
  1 | // TYPES FOR COURSE
  2 | // TODO: make workload an enum
  3 | export type Workload = {
  4 |     hours: number;
  5 |     type: string;
  6 | };
  7 | export type Employee = {
  8 |     full_name: string;
  9 |     email: string;
 10 | };
 11 | export type Schedule = {
 12 |     type: string;
 13 | };
 14 | 
 15 | export type Block = {
 16 |     type: string;
 17 | };
 18 | 
 19 | export type Language = {
 20 |     name: string;
 21 | };
 22 | 
 23 | export type Description = {
 24 |     // TODO: rename type and string since it is a reserved keyword
 25 |     type: string;
 26 |     string: string;
 27 | };
 28 | 
 29 | export type Exam = {
 30 |     duration: number;
 31 |     type: string;
 32 | };
 33 | 
 34 | export type Degree = {
 35 |     type: string;
 36 | };
 37 | 
 38 | export type Department = {
 39 |     name: string;
 40 | };
 41 | 
 42 | export type Faculty = {
 43 |     name: string;
 44 | };
 45 | 
 46 | export type Coordinator = {
 47 |     name: string;
 48 |     email: string;
 49 | };
 50 | 
 51 | export type Grade = {
 52 |     grade: string;
 53 |     count: number;
 54 | };
 55 | 
 56 | export type Statistics = {
 57 |     grades: Grade[];
 58 |     fail: number;
 59 |     mean: number;
 60 |     median: number;
 61 |     pass: number;
 62 |     absent: number;
 63 |     "pass-rate": number;
 64 |     total: number;
 65 | };
 66 | 
 67 | export type Course = {
 68 |     department: Department[];
 69 |     schedule: Schedule[];
 70 |     block: Block[];
 71 |     content: string;
 72 |     "learning-outcome": string;
 73 |     duration: string;
 74 |     faculty: Faculty[];
 75 |     title: string;
 76 |     statistics: Statistics | null;
 77 |     ects: number;
 78 |     coordinator: Coordinator[];
 79 |     language: Language[];
 80 |     exam: Exam[];
 81 |     id: string;
 82 |     degree: Degree[];
 83 |     "recommended-qualifications": string;
 84 |     workload: Workload[];
 85 | };
 86 | 
 87 | export const empty_course: Course = {
 88 |     department: [],
 89 |     schedule: [],
 90 |     block: [],
 91 |     content: "",
 92 |     "learning-outcome": "",
 93 |     duration: "",
 94 |     faculty: [],
 95 |     title: "",
 96 |     statistics: null,
 97 |     ects: 0,
 98 |     coordinator: [],
 99 |     language: [],
100 |     exam: [],
101 |     id: "",
102 |     degree: [],
103 |     "recommended-qualifications": "",
104 |     workload: [],
105 | };
106 | 
107 | export function total_hours(course: Course): number {
108 |     let total = 0;
109 |     course.workload.forEach((workload) => {
110 |         total += workload.hours;
111 |     });
112 |     return total;
113 | }
114 | 
115 | // Same as course but removed the employees and workloads and desc is just a string
116 | export type Overview = {
117 |     schedule: Schedule[];
118 |     block: Block[];
119 |     title: string;
120 |     statistics: StatisticsOverview | null;
121 |     summary: string;
122 |     ects: number;
123 |     language: Language[];
124 |     exam: Exam[];
125 |     id: string;
126 |     degree: Degree[];
127 | };
128 | 
129 | export type StatisticsOverview = {
130 |     mean: number;
131 |     median: number;
132 |     "pass-rate": number;
133 | };
134 | 
135 | export const empty_overview: Overview = {
136 |     schedule: [],
137 |     block: [],
138 |     title: "",
139 |     statistics: null,
140 |     summary: "",
141 |     ects: 0,
142 |     language: [],
143 |     exam: [],
144 |     id: "",
145 |     degree: [],
146 | };
147 | 
148 | // Type for query store
149 | export interface Filters {
150 |     blocks: string[];
151 |     degrees: string[];
152 |     schedules: string[];
153 |     exams: string[];
154 |     departments: string[];
155 |     languages: string[];
156 |     search: string;
157 | }
158 | 


--------------------------------------------------------------------------------
/flake.lock:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nodes": {
  3 |     "flake-utils": {
  4 |       "inputs": {
  5 |         "systems": "systems"
  6 |       },
  7 |       "locked": {
  8 |         "lastModified": 1681202837,
  9 |         "narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=",
 10 |         "owner": "numtide",
 11 |         "repo": "flake-utils",
 12 |         "rev": "cfacdce06f30d2b68473a46042957675eebb3401",
 13 |         "type": "github"
 14 |       },
 15 |       "original": {
 16 |         "owner": "numtide",
 17 |         "repo": "flake-utils",
 18 |         "type": "github"
 19 |       }
 20 |     },
 21 |     "flake-utils_2": {
 22 |       "inputs": {
 23 |         "systems": "systems_2"
 24 |       },
 25 |       "locked": {
 26 |         "lastModified": 1681202837,
 27 |         "narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=",
 28 |         "owner": "numtide",
 29 |         "repo": "flake-utils",
 30 |         "rev": "cfacdce06f30d2b68473a46042957675eebb3401",
 31 |         "type": "github"
 32 |       },
 33 |       "original": {
 34 |         "owner": "numtide",
 35 |         "repo": "flake-utils",
 36 |         "type": "github"
 37 |       }
 38 |     },
 39 |     "nixpkgs": {
 40 |       "locked": {
 41 |         "lastModified": 1683408522,
 42 |         "narHash": "sha256-9kcPh6Uxo17a3kK3XCHhcWiV1Yu1kYj22RHiymUhMkU=",
 43 |         "owner": "NixOS",
 44 |         "repo": "nixpkgs",
 45 |         "rev": "897876e4c484f1e8f92009fd11b7d988a121a4e7",
 46 |         "type": "github"
 47 |       },
 48 |       "original": {
 49 |         "owner": "NixOS",
 50 |         "ref": "nixos-unstable",
 51 |         "repo": "nixpkgs",
 52 |         "type": "github"
 53 |       }
 54 |     },
 55 |     "nixpkgs_2": {
 56 |       "locked": {
 57 |         "lastModified": 1681358109,
 58 |         "narHash": "sha256-eKyxW4OohHQx9Urxi7TQlFBTDWII+F+x2hklDOQPB50=",
 59 |         "owner": "NixOS",
 60 |         "repo": "nixpkgs",
 61 |         "rev": "96ba1c52e54e74c3197f4d43026b3f3d92e83ff9",
 62 |         "type": "github"
 63 |       },
 64 |       "original": {
 65 |         "owner": "NixOS",
 66 |         "ref": "nixpkgs-unstable",
 67 |         "repo": "nixpkgs",
 68 |         "type": "github"
 69 |       }
 70 |     },
 71 |     "root": {
 72 |       "inputs": {
 73 |         "flake-utils": "flake-utils",
 74 |         "nixpkgs": "nixpkgs",
 75 |         "rust-overlay": "rust-overlay"
 76 |       }
 77 |     },
 78 |     "rust-overlay": {
 79 |       "inputs": {
 80 |         "flake-utils": "flake-utils_2",
 81 |         "nixpkgs": "nixpkgs_2"
 82 |       },
 83 |       "locked": {
 84 |         "lastModified": 1703384182,
 85 |         "narHash": "sha256-g5K8bFBCIQ3x/j/MFTpkZo4It5SGWPwhBp/lASiy+pA=",
 86 |         "owner": "oxalica",
 87 |         "repo": "rust-overlay",
 88 |         "rev": "cb6395cb3c2f69ad028914c90bce833e51d339c9",
 89 |         "type": "github"
 90 |       },
 91 |       "original": {
 92 |         "owner": "oxalica",
 93 |         "repo": "rust-overlay",
 94 |         "type": "github"
 95 |       }
 96 |     },
 97 |     "systems": {
 98 |       "locked": {
 99 |         "lastModified": 1681028828,
100 |         "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
101 |         "owner": "nix-systems",
102 |         "repo": "default",
103 |         "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
104 |         "type": "github"
105 |       },
106 |       "original": {
107 |         "owner": "nix-systems",
108 |         "repo": "default",
109 |         "type": "github"
110 |       }
111 |     },
112 |     "systems_2": {
113 |       "locked": {
114 |         "lastModified": 1681028828,
115 |         "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
116 |         "owner": "nix-systems",
117 |         "repo": "default",
118 |         "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
119 |         "type": "github"
120 |       },
121 |       "original": {
122 |         "owner": "nix-systems",
123 |         "repo": "default",
124 |         "type": "github"
125 |       }
126 |     }
127 |   },
128 |   "root": "root",
129 |   "version": 7
130 | }
131 | 


--------------------------------------------------------------------------------
/backend/db-manager/src/db_manager/routes.clj:
--------------------------------------------------------------------------------
 1 | (ns db-manager.routes
 2 |   (:require [clojure.spec.alpha :as s]
 3 |             [db-manager.db :refer [get-course-ids
 4 |                                    get-course-by-id
 5 |                                    get-courses]]
 6 |             [clojure.data.json :as json]
 7 |             [db-manager.cache :refer [cache]]
 8 |             [org.httpkit.client :as http]))
 9 | 
10 | ; TODO: fix code duplication, this also apperas in core.clj
11 | 
12 | (def data-dir "../../data/")
13 | (def json-dir (str data-dir "json/"))
14 | (def stats-dir (str data-dir "statistics/"))
15 | 
16 | (defn try-finding-stats [course-id]
17 |   (try
18 |     ; stats file is in stats-dir
19 |     (let [stats-file (str stats-dir course-id ".json")]
20 |       (json/read-str (slurp stats-file)))
21 |     (catch Exception e
22 |       nil)))
23 | 
24 | (def ping-route
25 |   ["/ping"
26 |    {:name :ping
27 |     :get (fn [_]
28 |            {:status 200
29 |             :body "pong"})}])
30 | 
31 | (defn api-routes [db]
32 |   [["/get-all-course-ids" {:get {:parameters {}
33 |                                  :responses {200 {:body [string?]}}
34 |                                  :handler (fn [_]
35 |                                             {:status 200
36 |                                              :body (get-course-ids db)})}}]
37 | 
38 |    ; This route is used by the /course/:id route in the frontend, it returns a more detailed course
39 |    ["/get-detailed-course-info" {:get {:parameters {:query {:id string?}}
40 |                                        :responses {200 {:body map?}}
41 |                                        :summary "Get a course by its id"
42 |                                        :description "Returns a course with the given id"
43 |                                        :handler (fn [{{{:keys [id]} :query} :parameters}]
44 |                                                   {:status 200
45 |                                                    :body (get-course-by-id db id)})}}]
46 | 
47 |    ; Better echo route, not used
48 |    ["/echo" {:post {:parameters {:body map?}
49 |                     :handler (fn [request]
50 |                                (let [body (-> request :parameters :body)]
51 |                                  {:status 200
52 |                                   :body body}))}}]
53 | 
54 |    ; This route is used by the root route in the frontend, it returns an overview of all matching courses
55 |    ; we expect a map of keys with vectors
56 |    ["/find-course-overviews" {:post {:parameters {:body map?}
57 |                                      :handler (fn [request]
58 |                                                 (let [predicates (-> request :parameters :body)]
59 |                                                   {:status 200
60 |                                                    ; make get-courses a partial without the db argument
61 |                                                    :body (let [get-courses-partial (partial get-courses db)
62 |                                                                courses (cache predicates get-courses-partial)]
63 |                                                            {:count (count courses)
64 |                                                             :courses courses})}))}}]
65 |    ["/run-get-on-link" {:post {:parameters {:body map?}
66 |                                :handler (fn [request]
67 |                                           (let [body (-> request :parameters :body)
68 |                                                 link (get body :link)]
69 |                                             (println link)
70 |                                             (let [response @(http/get link)]
71 |                                               (if (= (:status response) 200)
72 |                                                 (let [body (:body response)]
73 |                                                   {:status 200
74 |                                                    :body body})
75 |                                                 (do
76 |                                                   (println response)
77 |                                                   (throw (Exception. "Request failed")))))))}}]])
78 | 


--------------------------------------------------------------------------------
/backend/db-manager/src/db_manager/core.clj:
--------------------------------------------------------------------------------
 1 | (ns db-manager.core
 2 |   (:require [clojure.core :as c]
 3 |             [muuntaja.core :as m]
 4 |             [reitit.ring :as ring]
 5 |             [reitit.coercion.spec]
 6 |             [reitit.ring.coercion :as rrc]
 7 |             [reitit.ring.middleware.muuntaja :as muuntaja]
 8 |             [reitit.ring.middleware.parameters :as parameters]
 9 |             [reitit.swagger-ui :as swagger-ui]
10 |             [reitit.swagger :as swagger]
11 |             [org.httpkit.server :refer [run-server]]
12 |             [db-manager.routes :refer [ping-route api-routes]]
13 |             [db-manager.db :refer [schema]]
14 |             [course-scraper.watcher :refer [sitemap-watcher scrape-course]]
15 |             [statistics.core :refer [stats-watcher]]
16 |             [ring.middleware.cors :refer [wrap-cors]]
17 |             [io.staticweb.rate-limit.storage :as storage]
18 |             [io.staticweb.rate-limit.middleware :refer [wrap-rate-limit ip-rate-limit]]
19 |             [datascript.core :as d])
20 |   (:gen-class))
21 | 
22 | (def conn (d/create-conn schema))
23 | 
24 | (def storage (storage/local-storage))
25 | 
26 | ; limit each IP to 1000 api calls per hour
27 | (def limit (ip-rate-limit :limit-id 1000 (java.time.Duration/ofHours 1)))
28 | (def rate-limit-config {:storage storage :limit limit})
29 | 
30 | (def data-dir "../../data/")
31 | (def json-dir (str data-dir "new_json/"))
32 | (def pages-dir "../../data/pages")
33 | 
34 | ; https://andersmurphy.com/2022/03/27/clojure-removing-namespace-from-keywords-in-response-middleware.html
35 | (defn transform-keys
36 |   [t coll]
37 |   (clojure.walk/postwalk (fn [x] (if (map? x) (update-keys x t) x)) coll))
38 | 
39 | (defn remove-namespace-keywords-in-response-middleware [handler & _]
40 |   (fn [req]
41 |     (let [resp (handler req)]
42 |       (cond-> resp
43 |         (comp map? :body) (update :body
44 |                                   (partial transform-keys
45 |                                            (comp keyword name)))))))
46 | 
47 | (defn app []
48 |   (ring/ring-handler
49 |    (ring/router
50 |     [["/api/swagger.json"
51 |       {:get {:no-doc true
52 |              :swagger {:info {:title "KU courses backend API"}
53 |                        :basePath "/"} ;; prefix for all paths
54 |              :handler (swagger/create-swagger-handler)}}]
55 |      ["/api" {:middleware [remove-namespace-keywords-in-response-middleware]}
56 |       ping-route
57 |       (api-routes conn)]]
58 |     {:data {:coercion reitit.coercion.spec/coercion
59 |             :muuntaja m/instance
60 |             ; TODO: fix the CORS middleware, it seems to not work for Chromium
61 |             :middleware [[wrap-cors
62 |                           :access-control-allow-origin [#".*"]
63 |                           :access-control-allow-methods [:get :post]
64 |                           :access-control-allow-headers #{"accept"
65 |                                                           "accept-encoding"
66 |                                                           "accept-language"
67 |                                                           "authorization"
68 |                                                           "content-type"
69 |                                                           "origin"}]
70 | 
71 |                          #(wrap-rate-limit % rate-limit-config)
72 |                          parameters/parameters-middleware
73 |                          muuntaja/format-middleware
74 |                          rrc/coerce-exceptions-middleware
75 |                          rrc/coerce-request-middleware
76 |                          rrc/coerce-response-middleware]}})
77 |    (ring/routes
78 |     (swagger-ui/create-swagger-ui-handler {:path "/api"
79 |                                            :url "/api/swagger.json"})
80 |     (ring/create-default-handler))))
81 | 
82 | (def main-config {:port 3000})
83 | (defn -main [& args]
84 | ; concurrently run sitemap-watcher scrape-course and stats-watcher so that they don't block the server
85 |   (future (sitemap-watcher scrape-course conn))
86 |   ; catch any potential errors and print them from the stats-watcher
87 |   (future (try
88 |             (stats-watcher)
89 |             (catch Exception e
90 |               (println e))))
91 | 
92 |   (println "Starting server on port " (:port main-config))
93 |   (run-server (app) main-config))
94 | 


--------------------------------------------------------------------------------
/frontend/src/components/GradeGraph/GradeGraph.svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |     import Chart, {
  3 |         type ChartConfiguration,
  4 |         type ChartItem,
  5 |         type DatasetChartOptions,
  6 |     } from "chart.js/auto";
  7 |     import ChartDataLabels from "chartjs-plugin-datalabels";
  8 |     import type { Grade } from "../../course";
  9 |     import { onMount } from "svelte";
 10 |     import jsonData from "./sample.json";
 11 |     Chart.register(ChartDataLabels);
 12 | 
 13 |     interface Props {
 14 |         // Props
 15 |         data?: Grade[];
 16 |         legend?: string;
 17 |     }
 18 | 
 19 |     let { data = jsonData, legend = "Explainer" }: Props = $props();
 20 | 
 21 |     const total = data.reduce((acc, row) => acc + row.count, 0);
 22 | 
 23 |     let graph: HTMLCanvasElement | undefined = $state();
 24 |     onMount(() => {
 25 |         // We grab the canvas context
 26 |         const ctx = graph?.getContext("2d");
 27 | 
 28 |         // If the canvas context != null, we can create our bar chart
 29 |         if (ctx) {
 30 |             new Chart(ctx, {
 31 |                 type: "bar",
 32 |                 data: {
 33 |                     // X-Axis labels
 34 |                     labels: data.map((row) => row.grade).reverse(),
 35 |                     datasets: [
 36 |                         {
 37 |                             label: legend,
 38 |                             // Student counts
 39 |                             data: data
 40 |                                 .map((row) => row.count / total)
 41 |                                 .reverse(),
 42 |                             backgroundColor: "rgba(200, 56, 60, 0.6)",
 43 |                             barPercentage: 1.0,
 44 |                         },
 45 |                     ],
 46 |                 },
 47 |                 options: {
 48 |                     // make a tooltip that shows !!! as wel
 49 |                     // Here we can define customization and options for our chart
 50 |                     plugins: {
 51 |                         legend: {
 52 |                             display: false,
 53 |                         },
 54 |                         tooltip: {
 55 |                             callbacks: {
 56 |                                 // Add percentage and count to tooltips (value is in percent)
 57 |                                 label: (context) => {
 58 |                                     const value = context.dataset.data[
 59 |                                         context.dataIndex
 60 |                                     ] as number;
 61 |                                     // round to whole numbers
 62 |                                     return `${(value * 100).toFixed(
 63 |                                         2
 64 |                                     )}% (${Math.round(value * total)})`;
 65 |                                 },
 66 |                             },
 67 |                         },
 68 |                         datalabels: {
 69 |                             anchor: "end",
 70 |                             align: "end",
 71 |                             font: {
 72 |                                 weight: "bold",
 73 |                                 size: 14,
 74 |                             },
 75 |                             offset: -4,
 76 |                             formatter: (value, context) => {
 77 |                                 return Math.round(value * total);
 78 |                             },
 79 |                         },
 80 |                     },
 81 |                     scales: {
 82 |                         y: {
 83 |                             ticks: {
 84 |                                 font: {
 85 |                                     weight: "bolder",
 86 |                                 },
 87 |                                 format: {
 88 |                                     style: "percent",
 89 |                                 },
 90 |                             },
 91 |                         },
 92 |                         x: {
 93 |                             ticks: {
 94 |                                 font: {
 95 |                                     weight: "bolder",
 96 |                                     size: 14,
 97 |                                 },
 98 |                             },
 99 |                         },
100 |                     },
101 |                     maintainAspectRatio: false, // Important to achieve responsiveness
102 |                 },
103 |             });
104 |         }
105 |     });
106 | </script>
107 | 
108 | <div class="h-44">
109 |     <canvas bind:this={graph}></canvas>
110 | </div>
111 | 


--------------------------------------------------------------------------------
/backend/rust_parser/src/parser/exam_information.rs:
--------------------------------------------------------------------------------
  1 | use crate::parser::Exam;
  2 | use anyhow::{bail, ensure, Context, Result};
  3 | 
  4 | use tl::{NodeHandle, VDom};
  5 | 
  6 | pub fn parse_course_exams(dom: &VDom) -> Result<Vec<Exam>> {
  7 |     let parser = dom.parser();
  8 |     let exam_table = dom
  9 |         .get_element_by_id("course-exams1")
 10 |         .context("Unable to find exam table, this should never happen??? i think?")?
 11 |         .get(parser)
 12 |         .unwrap()
 13 |         .as_tag()
 14 |         .unwrap();
 15 | 
 16 |     let dts = exam_table
 17 |         .query_selector(parser, "dt")
 18 |         .context("Unable to find any dts, this should be impossible")?;
 19 |     let dds = exam_table
 20 |         .query_selector(parser, "dd")
 21 |         .context("Unable to find any dds, this should be impossible")?;
 22 | 
 23 |     ensure!(
 24 |         dds.clone().count() == dts.clone().count(),
 25 |         "Number of dds and dts in exam table does not match"
 26 |     );
 27 | 
 28 |     let mut exams = Vec::<Exam>::new();
 29 |     for (dt, dd) in dts.zip(dds) {
 30 |         let dt_text = dt.get(parser).unwrap().inner_text(parser).to_string();
 31 |         match dt_text.as_str() {
 32 |             "Type of assessment" | "Prøveform" => {
 33 |                 let exam_boundary = dd
 34 |                     .get(parser)
 35 |                     .unwrap()
 36 |                     .children()
 37 |                     .unwrap()
 38 |                     .boundaries(parser)
 39 |                     .unwrap();
 40 |                 for j in exam_boundary.0..exam_boundary.1 {
 41 |                     let text = NodeHandle::new(j).get(parser).unwrap().inner_text(parser);
 42 |                     exams.push(parse_text_to_exam(&text)?);
 43 |                 }
 44 |                 ensure!(
 45 |                     !exams.is_empty(),
 46 |                     format!(
 47 |                         "No exams found in exam table: {}",
 48 |                         dd.get(parser).unwrap().inner_text(parser)
 49 |                     )
 50 |                 );
 51 |             }
 52 |             _ => continue,
 53 |         }
 54 |     }
 55 |     if exams.len() > 1 && exams[0] == exams[1] {
 56 |         exams.remove(0);
 57 |     }
 58 |     Ok(exams)
 59 | }
 60 | 
 61 | fn parse_text_to_exam(text: &str) -> Result<Exam> {
 62 |     let split = text.split(", ").collect::<Vec<&str>>();
 63 |     let exam_minutes = if split.clone().len() == 1 {
 64 |         None
 65 |     } else {
 66 |         // the first chars in split[1] are a duration in numbers
 67 |         let number = split[1]
 68 |             .chars()
 69 |             // take while numeric or a dot
 70 |             .take_while(|c| c.is_numeric() || *c == '.')
 71 |             .collect::<String>()
 72 |             .parse::<f32>()
 73 |             // convert error to Nothing type as number is an option type
 74 |             .ok();
 75 | 
 76 |         let factor = match split[1] {
 77 |             _ if split[1].contains("min") => Some(1),
 78 |             _ if split[1].contains("hour") || split[1].contains("time") => Some(60),
 79 |             _ if split[1].contains("day") || split[1].contains("dag") => Some(60 * 24),
 80 |             _ => None,
 81 |         };
 82 |         match (number, factor) {
 83 |             (None, _) => None,
 84 |             (_, None) => None,
 85 |             (Some(number), Some(factor)) => Some((number * factor as f32) as u32),
 86 |         }
 87 |     };
 88 | 
 89 |     let exam_name = split[0].to_lowercase().to_string();
 90 |     match exam_name {
 91 |         _ if exam_name.contains("aflevering") || exam_name.contains("assignment") => {
 92 |             Ok(Exam::Assignment(exam_minutes))
 93 |         }
 94 |         _ if exam_name.contains("skriftlig prøve")
 95 |             || exam_name.contains("skriftlig stedprøve")
 96 |             || exam_name.contains("written exam") =>
 97 |         {
 98 |             Ok(Exam::Written(exam_minutes))
 99 |         }
100 |         _ if exam_name.contains("mundtlig prøve")
101 |             || exam_name.contains("mundtligt forsvar")
102 |             || exam_name.contains("oral exam") =>
103 |         {
104 |             Ok(Exam::Oral(exam_minutes))
105 |         }
106 |         _ if exam_name.contains("portfolio")
107 |             || exam_name.contains("other")
108 |             || exam_name.contains("andet") =>
109 |         {
110 |             Ok(Exam::Other)
111 |         }
112 |         _ if exam_name.contains("løbende bedømmelse")
113 |             || exam_name.contains("continuous assessment") =>
114 |         {
115 |             Ok(Exam::ContinuousAssessment)
116 |         }
117 |         _ => bail!("Not implemented for exam type: {}", split[0]),
118 |     }
119 | }
120 | 


--------------------------------------------------------------------------------
/backend/rust_parser/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 3
  4 | 
  5 | [[package]]
  6 | name = "aho-corasick"
  7 | version = "1.1.1"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab"
 10 | dependencies = [
 11 |  "memchr",
 12 | ]
 13 | 
 14 | [[package]]
 15 | name = "anyhow"
 16 | version = "1.0.75"
 17 | source = "registry+https://github.com/rust-lang/crates.io-index"
 18 | checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
 19 | 
 20 | [[package]]
 21 | name = "diff"
 22 | version = "0.1.13"
 23 | source = "registry+https://github.com/rust-lang/crates.io-index"
 24 | checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
 25 | 
 26 | [[package]]
 27 | name = "itoa"
 28 | version = "1.0.9"
 29 | source = "registry+https://github.com/rust-lang/crates.io-index"
 30 | checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
 31 | 
 32 | [[package]]
 33 | name = "memchr"
 34 | version = "2.6.4"
 35 | source = "registry+https://github.com/rust-lang/crates.io-index"
 36 | checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
 37 | 
 38 | [[package]]
 39 | name = "pretty_assertions"
 40 | version = "1.4.0"
 41 | source = "registry+https://github.com/rust-lang/crates.io-index"
 42 | checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66"
 43 | dependencies = [
 44 |  "diff",
 45 |  "yansi",
 46 | ]
 47 | 
 48 | [[package]]
 49 | name = "proc-macro2"
 50 | version = "1.0.68"
 51 | source = "registry+https://github.com/rust-lang/crates.io-index"
 52 | checksum = "5b1106fec09662ec6dd98ccac0f81cef56984d0b49f75c92d8cbad76e20c005c"
 53 | dependencies = [
 54 |  "unicode-ident",
 55 | ]
 56 | 
 57 | [[package]]
 58 | name = "quote"
 59 | version = "1.0.33"
 60 | source = "registry+https://github.com/rust-lang/crates.io-index"
 61 | checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
 62 | dependencies = [
 63 |  "proc-macro2",
 64 | ]
 65 | 
 66 | [[package]]
 67 | name = "regex"
 68 | version = "1.9.6"
 69 | source = "registry+https://github.com/rust-lang/crates.io-index"
 70 | checksum = "ebee201405406dbf528b8b672104ae6d6d63e6d118cb10e4d51abbc7b58044ff"
 71 | dependencies = [
 72 |  "aho-corasick",
 73 |  "memchr",
 74 |  "regex-automata",
 75 |  "regex-syntax",
 76 | ]
 77 | 
 78 | [[package]]
 79 | name = "regex-automata"
 80 | version = "0.3.9"
 81 | source = "registry+https://github.com/rust-lang/crates.io-index"
 82 | checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9"
 83 | dependencies = [
 84 |  "aho-corasick",
 85 |  "memchr",
 86 |  "regex-syntax",
 87 | ]
 88 | 
 89 | [[package]]
 90 | name = "regex-syntax"
 91 | version = "0.7.5"
 92 | source = "registry+https://github.com/rust-lang/crates.io-index"
 93 | checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
 94 | 
 95 | [[package]]
 96 | name = "rust_parser"
 97 | version = "1.0.0"
 98 | dependencies = [
 99 |  "anyhow",
100 |  "pretty_assertions",
101 |  "regex",
102 |  "serde",
103 |  "serde_json",
104 |  "tl",
105 | ]
106 | 
107 | [[package]]
108 | name = "ryu"
109 | version = "1.0.15"
110 | source = "registry+https://github.com/rust-lang/crates.io-index"
111 | checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
112 | 
113 | [[package]]
114 | name = "serde"
115 | version = "1.0.188"
116 | source = "registry+https://github.com/rust-lang/crates.io-index"
117 | checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
118 | dependencies = [
119 |  "serde_derive",
120 | ]
121 | 
122 | [[package]]
123 | name = "serde_derive"
124 | version = "1.0.188"
125 | source = "registry+https://github.com/rust-lang/crates.io-index"
126 | checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
127 | dependencies = [
128 |  "proc-macro2",
129 |  "quote",
130 |  "syn",
131 | ]
132 | 
133 | [[package]]
134 | name = "serde_json"
135 | version = "1.0.107"
136 | source = "registry+https://github.com/rust-lang/crates.io-index"
137 | checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65"
138 | dependencies = [
139 |  "itoa",
140 |  "ryu",
141 |  "serde",
142 | ]
143 | 
144 | [[package]]
145 | name = "syn"
146 | version = "2.0.38"
147 | source = "registry+https://github.com/rust-lang/crates.io-index"
148 | checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b"
149 | dependencies = [
150 |  "proc-macro2",
151 |  "quote",
152 |  "unicode-ident",
153 | ]
154 | 
155 | [[package]]
156 | name = "tl"
157 | version = "0.7.7"
158 | source = "registry+https://github.com/rust-lang/crates.io-index"
159 | checksum = "d5e993a1c7c32fdf90a308cec4d457f507b2573acc909bd6e7a092321664fdb3"
160 | 
161 | [[package]]
162 | name = "unicode-ident"
163 | version = "1.0.12"
164 | source = "registry+https://github.com/rust-lang/crates.io-index"
165 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
166 | 
167 | [[package]]
168 | name = "yansi"
169 | version = "0.5.1"
170 | source = "registry+https://github.com/rust-lang/crates.io-index"
171 | checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
172 | 


--------------------------------------------------------------------------------
/backend/rust_parser/src/parser/logistic_information.rs:
--------------------------------------------------------------------------------
  1 | use crate::parser;
  2 | use crate::parser::LogisticInformation;
  3 | use anyhow::{bail, ensure, Result};
  4 | use tl::{NodeHandle, VDom};
  5 | 
  6 | // Convert two chars in a string to a u8
  7 | fn double_hex_to_u8(hex: &str) -> u8 {
  8 |     let mut chars = hex.chars();
  9 |     let first = chars.next().unwrap();
 10 |     let second = chars.next().unwrap();
 11 |     let first = first.to_digit(16).unwrap() as u8;
 12 |     let second = second.to_digit(16).unwrap() as u8;
 13 |     (first << 4) | second
 14 | }
 15 | 
 16 | fn deobfuscate_email(obfuscated_email: &str) -> Result<String> {
 17 |     let mut split = obfuscated_email.split('-');
 18 |     if split.clone().count() == 1 {
 19 |         return Ok(obfuscated_email.to_string());
 20 |     }
 21 |     let text = split.next_back().unwrap();
 22 | 
 23 |     let mut email = String::new();
 24 |     // Iterate through the split and move in steps of two
 25 |     // we offset the numbers by 0..25 since thats how they are obfuscated
 26 |     // if the regex matches an email we return it
 27 |     // else we continue incrementing the offset and hoping we find a match
 28 |     let regex = regex::Regex::new(r"(.+@.+\..+)").unwrap();
 29 |     for i in 0..25 {
 30 |         for j in (0..text.len()).step_by(2) {
 31 |             let hex = &text[j..j + 2];
 32 |             let u8 = double_hex_to_u8(hex) - i;
 33 |             email.push(u8 as char);
 34 |         }
 35 | 
 36 |         if regex.is_match(&email) {
 37 |             return Ok(email);
 38 |         }
 39 |         email.clear();
 40 |     }
 41 |     bail!("Unable to deobfuscate email: {}", obfuscated_email)
 42 | }
 43 | 
 44 | pub fn parse_logistic_info(dom: &VDom) -> Result<LogisticInformation> {
 45 |     // Extract the information from the dom.
 46 |     let info: Vec<(String, Vec<String>)> = extract_h5_li_pairs(dom)?;
 47 | 
 48 |     let mut departments: Vec<parser::Department> = vec![];
 49 |     let mut coordinators: Vec<parser::Coordinator> = vec![];
 50 |     let mut faculty: Option<parser::Faculty> = None;
 51 | 
 52 |     for (h5, lis) in &info {
 53 |         match h5.as_str() {
 54 |             "Kursusansvarlige" | "Course Coordinators" => {
 55 |                 for li in lis {
 56 |                     // the email should be removed from the name, it is enclosed in parenthesis
 57 |                     let mut split = li.split('(');
 58 |                     let name = split.next().unwrap().trim().to_string();
 59 |                     let obfuscated_email =
 60 |                         split.next().unwrap().split(')').next().unwrap().to_string();
 61 |                     let email = deobfuscate_email(&obfuscated_email)?;
 62 |                     coordinators.push(parser::Coordinator { name, email });
 63 |                 }
 64 |             }
 65 |             "Udbydende fakultet" | "Contracting faculty" => {
 66 |                 let faculty_str = lis.first().unwrap();
 67 |                 match faculty_str.as_str() {
 68 |                     "Det Natur- og Biovidenskabelige Fakultet" | "Faculty of Science" => {
 69 |                         faculty = Some(parser::Faculty::Science)
 70 |                     }
 71 |                     _ => bail!("Unknown faculty: {} <EXPECTED>", faculty_str),
 72 |                 }
 73 |             }
 74 |             _ if h5.contains("institut") || h5.contains("department") => {
 75 |                 for li in lis {
 76 |                     departments.push(parser::Department::from_str(li)?);
 77 |                 }
 78 |             }
 79 | 
 80 |             &_ => {}
 81 |         }
 82 |     }
 83 |     // ensure we have
 84 |     ensure!(
 85 |         !departments.is_empty(),
 86 |         format!("No departments found in logistic information: {:?}", info)
 87 |     );
 88 | 
 89 |     Ok(parser::LogisticInformation {
 90 |         departments,
 91 |         faculty: faculty.unwrap(),
 92 |         coordinators,
 93 |     })
 94 | }
 95 | 
 96 | pub fn extract_h5_li_pairs(dom: &VDom) -> Result<Vec<(String, Vec<String>)>> {
 97 |     let parser = dom.parser();
 98 | 
 99 |     let raw_panel_bodies = dom.get_elements_by_class_name("panel-body");
100 |     let panel_bodies =
101 |         raw_panel_bodies.map(|panel_body| panel_body.get(parser).unwrap().as_tag().unwrap());
102 | 
103 |     let mut pairs: Vec<(String, Vec<String>)> = vec![];
104 |     for panel_body in panel_bodies {
105 |         let h5s = panel_body.query_selector(parser, "h5").unwrap();
106 |         // if it contains h5s, we have found the right body
107 |         if h5s.clone().count() > 0 {
108 |             for h5 in h5s {
109 |                 let h5_text = h5.get(parser).unwrap().inner_text(parser).to_string();
110 | 
111 |                 let inner_handle = h5.get_inner(); // This is the handle to the h5 tag
112 | 
113 |                 // by magic we know that offsetting by 2 gives us the ul tag
114 |                 let ul_handle = NodeHandle::new(inner_handle + 2).get(parser).unwrap();
115 | 
116 |                 // get the chldren inside of next_sibling
117 |                 let mut children = vec![];
118 |                 for child in ul_handle.as_tag().unwrap().children().top().iter() {
119 |                     let child_text = child.get(parser).unwrap().inner_text(parser).to_string();
120 |                     children.push(child_text);
121 |                 }
122 |                 pairs.push((h5_text, children));
123 |             }
124 |         }
125 |     }
126 |     Ok(pairs)
127 | }
128 | 


--------------------------------------------------------------------------------
/backend/vector_store/src/embedding.rs:
--------------------------------------------------------------------------------
  1 | use super::{Coordinator, Course};
  2 | 
  3 | use anyhow::Result;
  4 | use async_stream::stream;
  5 | use fastembed::{Embedding, EmbeddingModel, InitOptions, TextEmbedding};
  6 | use futures_core::stream::Stream;
  7 | use rayon::prelude::*;
  8 | 
  9 | const BATCH_SIZE: usize = 32;
 10 | 
 11 | /// Embedding for a course
 12 | #[derive(Clone)]
 13 | pub struct CourseEmbedding {
 14 |     pub id: String,
 15 |     pub title: Embedding,
 16 |     pub content: Embedding,
 17 | }
 18 | 
 19 | /// Embedding for a coordinator
 20 | #[derive(Clone)]
 21 | pub struct CoordinatorEmbedding {
 22 |     pub email: String,
 23 |     pub name: Embedding,
 24 | }
 25 | 
 26 | /// Embedder for courses and coordinators
 27 | pub struct Embedder {
 28 |     pub model: TextEmbedding,
 29 | }
 30 | 
 31 | impl Embedder {
 32 |     pub fn new() -> Self {
 33 |         let model: TextEmbedding =
 34 |             TextEmbedding::try_new(InitOptions::new(EmbeddingModel::AllMiniLML12V2Q))
 35 |                 .expect("Failed to load model, please check your internet connection");
 36 |         Self { model }
 37 |     }
 38 | 
 39 |     /// Embeds a Vec<Course> into course embeddings
 40 |     /// This returns an asynchronous stream of CourseEmbedding
 41 |     pub fn embed_courses(
 42 |         &self,
 43 |         documents: Vec<Course>,
 44 |     ) -> impl Stream<Item = CourseEmbedding> + '_ {
 45 |         stream! {
 46 |             for batch in documents.chunks(BATCH_SIZE) {
 47 |                 let embedded_courses = embed_course_batch(batch.to_vec(), &self.model).expect("Failed to embed courses, this should not happen");
 48 |                 for embedded_course in embedded_courses.iter().cloned() {
 49 |                     yield embedded_course;
 50 |                 }
 51 |                 println!("Embedded batch of courses");
 52 |             }
 53 |         }
 54 |     }
 55 | 
 56 |     /// Embeds a Vec<Coordinator> into coordinator embeddings
 57 |     /// This returns an asynchronous stream of CoordinatorEmbedding
 58 |     pub fn embed_coordinators(
 59 |         &self,
 60 |         coordinators: Vec<Coordinator>,
 61 |     ) -> impl Stream<Item = CoordinatorEmbedding> + '_ {
 62 |         stream! {
 63 |             for batch in coordinators.chunks(BATCH_SIZE) {
 64 |                 let model = &self.model;
 65 |                 let embedded_coordinators = embed_coordinator_batch(
 66 |                     batch.to_vec(),
 67 |                     model
 68 |                 ).expect("Failed to embed coordinators, this should not happen");
 69 |                 for embedded_coordinator in embedded_coordinators.iter().cloned() {
 70 |                     yield embedded_coordinator;
 71 |                 }
 72 |                 println!("Embedded batch of coordinators");
 73 |             }
 74 |         }
 75 |     }
 76 | 
 77 |     // Embeds a query into an embedding
 78 |     // This returns an Embedding
 79 |     pub fn embed_query(&self, query: String) -> Embedding {
 80 |         query_embed(&query, &self.model).expect("Failed to embed query, this should not happen")
 81 |     }
 82 | }
 83 | 
 84 | /// Helper function to embed a batch of courses
 85 | /// This returns a Vec<CourseEmbedding>
 86 | fn embed_course_batch(courses: Vec<Course>, model: &TextEmbedding) -> Result<Vec<CourseEmbedding>> {
 87 |     let batch_size = Some(32);
 88 | 
 89 |     let embedded_titles = passage_embed(
 90 |         courses.par_iter().map(|x| x.title.clone()).collect(),
 91 |         model,
 92 |         batch_size,
 93 |     )?;
 94 | 
 95 |     let embedded_descriptions = passage_embed(
 96 |         courses.par_iter().map(|x| x.content.clone()).collect(),
 97 |         model,
 98 |         batch_size,
 99 |     )?;
100 | 
101 |     let embedded_courses: Vec<CourseEmbedding> = courses
102 |         .iter()
103 |         .cloned()
104 |         .zip(embedded_titles.to_vec())
105 |         .zip(embedded_descriptions.to_vec())
106 |         .map(|((course, title), content)| CourseEmbedding {
107 |             id: course.id,
108 |             title,
109 |             content,
110 |         })
111 |         .collect();
112 | 
113 |     Ok(embedded_courses)
114 | }
115 | 
116 | /// Helper function to embed a batch of coordinators
117 | /// This returns a Vec<CoordinatorEmbedding>
118 | fn embed_coordinator_batch(
119 |     coordinators: Vec<Coordinator>,
120 |     model: &TextEmbedding,
121 | ) -> Result<Vec<CoordinatorEmbedding>> {
122 |     let name_embeddings = passage_embed(
123 |         coordinators.iter().map(|x| x.name.clone()).collect(),
124 |         model,
125 |         Some(32),
126 |     )?;
127 | 
128 |     let coordinator_embeddings: Vec<CoordinatorEmbedding> = coordinators
129 |         .iter()
130 |         .cloned()
131 |         .zip(name_embeddings.iter().cloned())
132 |         .map(|(coordinator, name_embedding)| CoordinatorEmbedding {
133 |             email: coordinator.email,
134 |             name: name_embedding,
135 |         })
136 |         .collect();
137 |     Ok(coordinator_embeddings)
138 | }
139 | 
140 | /// Helper function to embed a list of passages
141 | /// Passages are prepended with "passage: " before being embedded
142 | /// This returns a Vec<Embedding>
143 | fn passage_embed(
144 |     passages: Vec<String>,
145 |     model: &TextEmbedding,
146 |     batch_size: Option<usize>,
147 | ) -> Result<Vec<Embedding>> {
148 |     // for each passage, add passage: to the front of it
149 |     let passages: Vec<String> = passages
150 |         .par_iter()
151 |         .map(|x| format!("passage: {x}"))
152 |         .collect();
153 |     model.embed(passages, batch_size)
154 | }
155 | 
156 | /// Helper function to embed a query
157 | /// The query is prepended with "query: " before being embedded
158 | /// This returns an Embedding
159 | fn query_embed(query: &str, model: &TextEmbedding) -> Result<Embedding> {
160 |     // add query: to the front of the query
161 |     model
162 |         .embed(vec![format!("query: {query}")], None)
163 |         .map(|x| x[0].clone())
164 | }
165 | 


--------------------------------------------------------------------------------
/backend/vector_store/src/main.rs:
--------------------------------------------------------------------------------
  1 | use axum::extract::Query;
  2 | use axum::extract::State;
  3 | use axum::routing::get;
  4 | use axum::{Json, Router};
  5 | use futures_util::pin_mut;
  6 | use futures_util::stream::StreamExt;
  7 | use serde::Deserialize;
  8 | use sqlx::migrate;
  9 | use std::env;
 10 | use std::path::Path;
 11 | use std::sync::Arc;
 12 | 
 13 | mod db;
 14 | use db::PostgresDB;
 15 | 
 16 | mod populate;
 17 | use populate::upsert_documents_from_path;
 18 | 
 19 | mod embedding;
 20 | use embedding::Embedder;
 21 | 
 22 | #[derive(Clone)]
 23 | struct Course {
 24 |     id: String,
 25 |     title: String,
 26 |     content: String,
 27 | }
 28 | 
 29 | #[derive(Debug, Deserialize, Clone)]
 30 | struct Coordinator {
 31 |     name: String,
 32 |     email: String,
 33 | }
 34 | 
 35 | #[derive(Clone)]
 36 | struct AppState {
 37 |     db: Arc<PostgresDB>,
 38 |     embedder: Arc<Embedder>,
 39 | }
 40 | 
 41 | #[derive(Debug, Deserialize)]
 42 | struct SearchQuery {
 43 |     query: String,
 44 | }
 45 | 
 46 | /// Main function that starts the server
 47 | /// This function initializes the database, runs the migrations, and starts the server
 48 | /// A temporary functionality this server has is to populate the database with the documents
 49 | /// in the new_json directory, this will be removed in the future
 50 | /// The server has two endpoints:
 51 | ///    - /health: returns "healthy" if the server is running
 52 | ///    - /search: returns a list of course ids that most closely match the query
 53 | ///
 54 | /// The server also has two background tasks that run every 6 hours:
 55 | ///   - populate_coordinator_embeddings: updates the coordinator embeddings in the database
 56 | ///   - populate_course_embeddings: updates the course embeddings in the database
 57 | ///   These tasks use the embedder to generate the embeddings
 58 | #[tokio::main]
 59 | async fn main() {
 60 |     let conn_string = env::var("POSTGRES_URL").expect(
 61 |         "POSTGRES_URL not set, it should be in the format postgres://user:password@host/db",
 62 |     );
 63 | 
 64 |     let db = PostgresDB::new(&conn_string)
 65 |         .await
 66 |         .expect("Failed to create database");
 67 |     migrate!("./migrations")
 68 |         .run(&db.pool)
 69 |         .await
 70 |         .expect("Failed to run migrations");
 71 | 
 72 |     let data_dir = env::var("DATA_DIR").expect("DATA_DIR not set");
 73 |     let new_json_dir = data_dir.to_owned() + "new_json/";
 74 |     let path = Path::new(&new_json_dir);
 75 |     upsert_documents_from_path(&db, path)
 76 |         .await
 77 |         .expect("Failed to upsert documents from path into database");
 78 | 
 79 |     let state = AppState {
 80 |         db: Arc::new(db),
 81 |         embedder: Arc::new(Embedder::new()),
 82 |     };
 83 | 
 84 |     const SYNC_INTERVAL: u64 = 60 * 60 * 6;
 85 | 
 86 |     let coordinator_state = state.clone();
 87 |     tokio::spawn(async move {
 88 |         loop {
 89 |             populate_coordinator_embeddings(&coordinator_state.db, &coordinator_state.embedder)
 90 |                 .await;
 91 |             println!("done populating coordinator embeddings");
 92 |             tokio::time::sleep(tokio::time::Duration::from_secs(SYNC_INTERVAL)).await;
 93 |         }
 94 |     });
 95 | 
 96 |     let course_state = state.clone();
 97 |     tokio::spawn(async move {
 98 |         loop {
 99 |             populate_course_embeddings(&course_state.db, &course_state.embedder).await;
100 |             println!("done populating course embeddings");
101 |             tokio::time::sleep(tokio::time::Duration::from_secs(SYNC_INTERVAL)).await;
102 |         }
103 |     });
104 | 
105 |     let app = Router::new()
106 |         .route("/health", get(|| async { "healthy" }))
107 |         .route("/search", get(search))
108 |         .with_state(state);
109 |     let addr = env::var("SERVER_ADDRESS").expect("SERVER_ADDRESS must be set");
110 |     let port = env::var("SERVER_PORT").expect("SERVER_PORT must be set");
111 |     let listener = tokio::net::TcpListener::bind(&format!("{addr}:{port}"))
112 |         .await
113 |         .expect("Failed to bind to port");
114 |     println!("Listening on {addr}:{port}");
115 |     axum::serve(listener, app)
116 |         .await
117 |         .expect("Failed to start server, this should not happen");
118 | }
119 | 
120 | /// Search endpoint that takes a query parameter and returns a list of the course ids that
121 | /// most closely match the query
122 | async fn search(
123 |     Query(query): Query<SearchQuery>,
124 |     State(state): State<AppState>,
125 | ) -> Json<Vec<String>> {
126 |     let query_embedding = state.embedder.embed_query(query.query);
127 |     let db = &state.db;
128 |     let ids = db
129 |         .get_most_relevant_course_ids(&query_embedding)
130 |         .await
131 |         .expect("Failed to get most relevant course ids");
132 |     Json(ids)
133 | }
134 | 
135 | /// Upserts the coordinator embeddings into the database using the coordinator information
136 | /// from the database and the embedder to generate the embeddings
137 | async fn populate_coordinator_embeddings(db: &PostgresDB, embedder: &Embedder) {
138 |     let missing_coordinators = db
139 |         .get_missing_embedding_email_names()
140 |         .await
141 |         .expect("Failed to get missing coordinators");
142 | 
143 |     println!("missing coordinators: {}", missing_coordinators.len());
144 | 
145 |     let embedding_stream = embedder.embed_coordinators(missing_coordinators);
146 |     pin_mut!(embedding_stream);
147 | 
148 |     while let Some(embedded_coordinator) = embedding_stream.next().await {
149 |         db.insert_coordinator_embedding(embedded_coordinator)
150 |             .await
151 |             .expect("Failed to insert coordinator embedding");
152 |     }
153 | }
154 | 
155 | /// Upserts the course embeddings into the database using the course information
156 | /// from the database and the embedder to generate the embeddings
157 | async fn populate_course_embeddings(db: &PostgresDB, embedder: &Embedder) {
158 |     let outdated_embeddings = db
159 |         .get_outdated_embedding_course_ids()
160 |         .await
161 |         .expect("Failed to get outdated embeddings");
162 | 
163 |     let outdated_courses: Vec<Course> = db
164 |         .get_courses_by_ids(&outdated_embeddings)
165 |         .await
166 |         .expect("Failed to get courses by ids");
167 | 
168 |     println!("missing documents: {}", outdated_courses.len());
169 | 
170 |     let embedding_stream = embedder.embed_courses(outdated_courses);
171 |     pin_mut!(embedding_stream);
172 | 
173 |     while let Some(embedded_document) = embedding_stream.next().await {
174 |         db.insert_course_embedding(embedded_document)
175 |             .await
176 |             .expect("Failed to insert course embedding");
177 |     }
178 | }
179 | 


--------------------------------------------------------------------------------
/backend/db-manager/src/course_scraper/watcher.clj:
--------------------------------------------------------------------------------
  1 | (ns course-scraper.watcher
  2 |   (:require [clojure.zip :as zip]
  3 |             [clojure.xml :as xml]
  4 |             [clojure.java.io :as io]
  5 |             [org.httpkit.client :as http]
  6 |             [clojure.java.shell :as shell]
  7 |             [datascript.core :as d]
  8 |             [db-manager.db :refer [schema]]
  9 |             [exam-scraper.core :refer [get-itx-courses-from-dir patch-courses-w-itx]]
 10 |             [course-scraper.upsert :refer [try-finding-stats transactions-w-stats read-json-file]])
 11 |   (:import (javax.net.ssl SSLEngine SSLParameters SNIHostName)
 12 |            (java.net URI))
 13 | 
 14 |   (:gen-class))
 15 | 
 16 | (def data-dir "../../data/")
 17 | (def pages-dir "../../data/pages")
 18 | (def json-dir "../../data/new_json")
 19 | (def exam-pdfs-dir "../../data/exam_pdfs")
 20 | (def stats-dir (str data-dir "statistics/"))
 21 | 
 22 | 
 23 | (defn grab-info-from-course [course]
 24 |   (let [content (:content course)
 25 |         loc (first (filter #(= (:tag %) :loc) content))
 26 |         lastmod (first (filter #(= (:tag %) :lastmod) content))
 27 |         ; convert YYYY-MM-DD to java.time
 28 |         ldt (java.time.LocalDate/parse (first (:content lastmod)))
 29 |         instant (.atStartOfDay ldt (java.time.ZoneId/of "Europe/Copenhagen"))
 30 |         timestamp (.toEpochSecond instant)]
 31 | 
 32 |     {:loc (first (:content loc))
 33 |      :id (last (clojure.string/split (first (:content loc)) #"/"))
 34 |      :lastmod (first (:content lastmod))
 35 |      :timestamp (* 1000 timestamp)}))
 36 | 
 37 | (defn grab-mod-date
 38 |   "Grabs the modification date of the file with the course-id as name or 0 if it doesn't exist"
 39 |   [course-id]
 40 |   (let [file (io/file pages-dir (str course-id ".html"))]
 41 |     (if (.exists file)
 42 |       (.lastModified file)
 43 |       0)))
 44 | 
 45 | 
 46 | (defn sitemap-watcher
 47 |   "Watches the course sitemap for last-mod newer than time"
 48 |   [callback conn]
 49 |   (let [newly-scraped (atom [])
 50 |         sitemap-course-ids (atom [])
 51 |         sitemap-url "https://kurser.ku.dk/sitemap.xml"
 52 |         sitemap-zipper (zip/xml-zip (xml/parse sitemap-url))
 53 |         courses (-> sitemap-zipper
 54 |                     zip/down
 55 |                     ; skip the first element, which is the page index, then grab everything
 56 |                     zip/right
 57 | 
 58 |                     zip/rights)]
 59 | 
 60 | 
 61 |     ; for every course, grab mod date and check if it's newer than the file
 62 |     ; if it is, grab the info from the course and pass it to the callback
 63 |     (println "[course scraper]: Scraping courses")
 64 |     (doseq [course courses]
 65 |       (let [course-info (grab-info-from-course course)
 66 |             course-id (:id course-info)
 67 |             course-mod-date (grab-mod-date course-id)
 68 |             course-lastmod (:timestamp course-info)]
 69 |         (swap! sitemap-course-ids conj course-id)
 70 |         (when (> course-lastmod course-mod-date)
 71 |           (callback course-info newly-scraped))))
 72 | 
 73 |     ; check if the sitemap lacks courses that are in the pages directory, if so, delete them
 74 |     (let [files (.list (io/file pages-dir))]
 75 |       (doseq [file files]
 76 |         (when-not (some #(= file (str % ".html")) @sitemap-course-ids)
 77 |           (println "[course scraper]: Deleting" file)
 78 |           (io/delete-file (io/file pages-dir file)))))
 79 | 
 80 |     ; go to sleep for 30 minutes and then do it again
 81 |     (println "[course scraper]: Finished scraping, going to sleep")
 82 |     (println "[course scraper]: Modified" (count @newly-scraped) "courses")
 83 | 
 84 |     (if-not (zero? (count @newly-scraped))
 85 |       (let [result (future (shell/sh "rust_parser" pages-dir json-dir))]
 86 |         (println "[course parser] Running rust parser...")
 87 |         (println "[course parser] Parser stderr: " (:err @result))
 88 |         (println "[course parser] Finished parsing courses"))
 89 | 
 90 |       (println "[course parser] No new courses, not running parser"))
 91 | 
 92 |     (println "[course scraper]: Updating database")
 93 |     (let [stats-finder #(try-finding-stats stats-dir %)
 94 |           ; FIXME: we already know whihc courses to take, this does extra work
 95 |           ; this currently takes all courses instead of updating the ones that are new
 96 |           courses (map read-json-file (drop 1 (file-seq (clojure.java.io/file json-dir))))
 97 |           itx-course-ids (get-itx-courses-from-dir exam-pdfs-dir)
 98 |           patched-courses (patch-courses-w-itx courses itx-course-ids)]
 99 |       ; FIXME: this is a hack and we should just drop the workflows and exams
100 |       (d/reset-conn! conn (d/empty-db schema))
101 | 
102 |       (d/transact! conn (transactions-w-stats stats-finder patched-courses)))
103 |     (println "[course scraper]: Finished updating database")
104 | 
105 |     (reset! newly-scraped [])
106 |     (Thread/sleep (* 1000 60 60)) ;; 1 hour, unit is ms
107 |     (recur callback conn)))
108 | 
109 | ; Magical snippet of code that allows us to use SNI with http-kit
110 | ; https://kumarshantanu.medium.com/using-server-name-indication-sni-with-http-kit-client-f7d92954e165
111 | (defn sni-configure
112 |   [^SSLEngine ssl-engine ^URI uri]
113 |   (let [^SSLParameters ssl-params (.getSSLParameters ssl-engine)]
114 |     (.setServerNames ssl-params [(SNIHostName. (.getHost uri))])
115 |     (.setSSLParameters ssl-engine ssl-params)))
116 | 
117 | (def client (http/make-client {:ssl-configurer sni-configure}))
118 | (def options {:client client :timeout (* 1000 60 5)})
119 | 
120 | (defn scrape-course
121 |   "Scrapes the course page and writes it to disk, the 300ms sleep is to avoid DOSing KU"
122 |   [course newly-scraped]
123 |   (let [loc (:loc course)]
124 |     (println "[course scraper]: Scraping" loc)
125 |     (http/get loc options
126 |               (fn [{:keys [status headers body error]}] ;; asynchronous response handling
127 |                 (if error
128 |                   (println "[course scraper]: Failed, exception is " error)
129 |                   (do
130 |                     (println "[course scraper]: Writing " loc)
131 |                     (spit (str pages-dir "/" (:id course) ".html") body)
132 |                     (swap! newly-scraped conj course))))))
133 |   (Thread/sleep 300))
134 | 
135 | (defn generate-url-combinations
136 |   "KU has not given us any useful API and since the exams don't always correspond to the course's block
137 |   we have to generate all combinations of Summer/Winter and the years from now to 2020"
138 |   [course-id]
139 |   (let [base-url "https://karakterstatistik.stads.ku.dk/Histogram/"]
140 |     ; generate all combinations of year from now to 2020 and semester (summer, winter)
141 |     (for [year (range (.getYear (java.time.LocalDate/now)) 2020 -1)
142 |           semester ["Summer" "Winter"]]
143 |       (str base-url course-id "/" semester "-" year))))
144 | 


--------------------------------------------------------------------------------
/backend/db-manager/src/statistics/core.clj:
--------------------------------------------------------------------------------
  1 | (ns statistics.core
  2 |   (:import (org.jsoup Jsoup))
  3 |   (:require
  4 |    [clojure.data.json :as json]
  5 |    [clojure.java.io :as io]
  6 |    [clojure.string :as str]
  7 |    [statistics.utils :refer [stats]]
  8 |    [clj-http.client :as client])
  9 |   (:gen-class))
 10 | 
 11 | (def data-dir "../../data/")
 12 | (def json-dir (str data-dir "new_json/"))
 13 | (def out-dir (str data-dir "statistics/"))
 14 | 
 15 | 
 16 | (defn parse-block [block]
 17 |   (case block
 18 |     "One" 1
 19 |     "Two" 2
 20 |     "Three" 3
 21 |     "Four" 4
 22 |     "Summer" 5
 23 |     0))
 24 | 
 25 | ; take a list of blocks in strings "One", "Two", "Three", "Four" and find the smallest
 26 | (defn get-first-block [blocks]
 27 |   (->> blocks
 28 |        (map parse-block)
 29 |        (apply min)))
 30 | 
 31 | (defn read-json
 32 |   "Read a json file and return the data as a map"
 33 |   [file]
 34 |   (let [old-course (json/read-str (slurp (str json-dir file)) :key-fn keyword)
 35 |         temp (assoc old-course :course-id (get-in old-course [:info :id]))]
 36 |     (assoc temp :start-block (get-first-block (get-in old-course [:info :block])))))
 37 | 
 38 | 
 39 | (defn query-stads
 40 |     "This should make a POST request with a form to the stats website, this returns a html table or nil"
 41 |     [course-info]
 42 |     (let [searchText (str/trim (str/replace (:title course-info) #"\(.*\)" ""))
 43 |           block (str "B" (:start-block course-info))
 44 |           url "https://karakterstatistik.stads.ku.dk/Search/Courses"
 45 |           response (client/post url {:form-params
 46 |                                      {:searchText searchText
 47 |                                       :block block
 48 |                                       ; This number corresponds to the faculty of science
 49 |                                       :faculty "1868"}
 50 |                                      })]
 51 |            (if (= 200 (:status response))
 52 |                 (:body response)
 53 |                 (do
 54 |                   (println "[statistics] Error fetching: " url)
 55 |                   (println "[statistics] Status code: " (:status response))
 56 |                   nil))))
 57 | 
 58 | 
 59 | (defn grab-urls [query-html] ; we get a table, the second row contains two tds, the second contains several a tags)
 60 |     (let [table (-> (str query-html)
 61 |                     Jsoup/parse
 62 |                     (.getElementsByClass "searchResultTable")
 63 |                     first
 64 |                     (.getElementsByTag "tr"))]
 65 |         ; check the number of trs, if there is less than 2, we return nil
 66 |         (if (= 1 (count table))
 67 |             nil
 68 |             (let [second-row (second table)
 69 |                   tds (.getElementsByTag second-row "td")]
 70 |               (map (fn [a] (.attr a "href")) (.getElementsByTag (second tds) "a"))))))
 71 | 
 72 | (println (grab-urls (query-stads {:title "Dyrs livsformer og funktion"})))
 73 |       
 74 | 
 75 | ; HOW TO GENERATE THE COURSE STATISTICS PAGE URL:
 76 | ; start with base https://karakterstatistik.stads.ku.dk/Histogram/
 77 | ; add the course-id which also exists in each course map
 78 | ; the course ID has a "U" at the end, this has to be changed to an "E" for exams
 79 | ; add semester which is "Winter" or "Summer"
 80 | ; add year which is the year of the exam
 81 | ; EXAMPLE: Advanced Algorithms and Data Structures (AADS)
 82 | ; NDAA09023U - SCIENCE
 83 | ; =>
 84 | ; https://karakterstatistik.stads.ku.dk/Histogram/NDAA09023E/Winter-2022
 85 | (defn generate-url-combinations [course-id]
 86 |   (let [base-url "https://karakterstatistik.stads.ku.dk/Histogram/"
 87 |         ; The courses end with a U, but the exams end with an E
 88 |         exam-name (if (= \U (last course-id))
 89 |                     (str/replace course-id "U" "E")
 90 |                     course-id)]
 91 |     ; Generate all combinations of year from now to 2020 and semester (summer, winter)
 92 |     (for [year (range (.getYear (java.time.LocalDate/now)) 2020 -1)
 93 |           semester ["Summer" "Winter"]]
 94 |       {:url (str base-url exam-name "/" semester "-" year)
 95 |        :course-id course-id
 96 |        :year year})))
 97 | 
 98 | (defn try-scraping
 99 |   "Tries to scrape the given url and returns nil if it fails,
100 |   if the error code is 500 it returns nil, otherwise it throws an exception"
101 |   [url]
102 |   (println "[statistics] Trying: " url)
103 |   (try (.get (Jsoup/connect url))
104 |        (catch Exception e
105 |          (let [status (.getStatusCode e)]
106 |            (if (= 500 status)
107 |              nil
108 |              (do
109 |                (println "[statistics] Error fetching: " url)
110 |                (println "[statistics] Status code: " status)
111 |                (throw e)))))))
112 | 
113 | (defn existing-json? [course-info]
114 |   (let [file (io/file (str out-dir (:course-id course-info) ".json"))]
115 |     (if (.exists file)
116 |       (let [data (json/read (io/reader file))]
117 |         (if (not= (:year data) "2023")
118 |           false
119 |           (= (:re-exam data) nil)))
120 |       true)))
121 | ; find all jsons
122 | ; TODO: refactor this since we arent using the start block anymore
123 | (def course-infos-init (for [file (file-seq (io/file json-dir))
124 |                              :when (.endsWith (.getName file) ".json")]
125 |                          (read-json (.getName file))))
126 | ;(def course-infos-init [{:course-id "NNEB19009U"}])
127 | 
128 | (println "number of courses: " (count course-infos-init))
129 | 
130 | ; The exams  don't ever change, so we only need to fetch them once
131 | ; TODO: this should not be filtering out courses that haven't had their re-exam yet
132 | (def course-infos (filter existing-json? course-infos-init))
133 | 
134 | ; Checks for colspan tag in html, which indicates that the table contains the exam data
135 | (defn contains-colspan? [elem]
136 |   (let [attributes (.attributes elem)]
137 |     (= "2" (.get attributes "colspan"))))
138 | ;TODO make sure both exam and reexam data is contained in HTML
139 | (defn fetch-html [html]
140 |   (filter contains-colspan? (-> (str html)
141 |                                 Jsoup/parse
142 |                                 (.getElementsByTag "td"))))
143 | 
144 | ; Check if the exam table exists
145 | (defn empty-exam? [table]
146 |   (not (< (count (.getElementsByTag table "td")) 3)))
147 | 
148 | (defn translate-grade [grade]
149 |   (case (str/lower-case grade)
150 |     "ej mødt" "Absent"
151 |     "ikke bestået" "Failed"
152 |     "bestået" "Passed"
153 |     grade))
154 | 
155 | ; The exams are stored in html tables, where each row has three columns (grade, count, percentage)
156 | ; We only grab the count and grade
157 | (defn grade-count-reducer [grades-list three-elems]
158 |   (conj grades-list {:grade (translate-grade (.text (first three-elems)))
159 |                      :count (Integer/parseInt (.text (second three-elems)))}))
160 | 
161 | (defn fetch-data [table]
162 |   (if (empty-exam? table)
163 |     (reduce grade-count-reducer [] (partition 3 (-> (second (.getElementsByTag table "tbody"))
164 |                                                     (.getElementsByTag "td"))))
165 |     nil))
166 | 
167 | (defn add-stats [exam-table]
168 |   (when-not (nil? exam-table)
169 |     (stats exam-table)))
170 | 
171 | (defn build-stats-json [tables]
172 |   (let [exam-table (first tables)
173 |         re-exam-table (second tables)]
174 |     {:exam (add-stats (fetch-data exam-table))
175 |      :re-exam (add-stats (fetch-data re-exam-table))}))
176 | 
177 | (defn save-exam [tables course-id year]
178 |   (spit (str out-dir course-id ".json") (json/write-str (assoc tables :course_id course-id :year year))))
179 | 
180 | (defn parse-to-tables [html]
181 |   (build-stats-json (fetch-html (:html html))))
182 | 
183 | (defn spit-all-to-json [exam-data-seq]
184 |   (doseq [exam-data exam-data-seq]
185 |     (when (some? exam-data)
186 |       (let [course-id (:course-id exam-data)
187 |             year (:year exam-data)
188 |             tables (select-keys exam-data [:exam :re-exam])]
189 |         (save-exam tables course-id year)))))
190 | 
191 | 
192 | 
193 | (defn get-statistics-data
194 |   "Takes a map with the course-id, year and url it if it exists,
195 |   otherwise it returns nil"
196 |   [course]
197 |   (let [course-id (:course-id course)
198 |         ;combinations (generate-url-combinations course-id)]
199 |         urls (grab-urls (query-stads course))
200 |         ;FIXME: year is no longer being passed to the combinations
201 |         combinations (map (fn [url] {:url url :course-id course-id}) urls)
202 |         ; merge with combinations from (generate-url-combinations course-id)
203 |         combinations (concat combinations (generate-url-combinations course-id))]
204 |     (println "[statistics] Trying to find exam for: " course-id)
205 |     (loop [combinations combinations]
206 |       (when-not (empty? combinations)
207 |         (let [combination (first combinations)
208 |               url (:url combination)
209 |               html (try-scraping url)
210 |               exam-data (try (parse-to-tables {:html html})
211 |                                    (catch Exception e
212 |                                      (println "[statistics] Error parsing: " url)
213 |                                      nil))]
214 |           (if (nil? (:exam exam-data))
215 |             ; Sleep 200ms to be nice to the server
216 |             (do (Thread/sleep 200)
217 |                 (recur (rest combinations)))
218 |             (do
219 |               (println "[statistics] Found exam for: " course-id)
220 |               (merge combination exam-data))))))))
221 | 
222 | (def exam-data-seq (for [course course-infos]
223 |                 (get-statistics-data course)))
224 | 
225 | 
226 | 
227 | (defn stats-watcher
228 |   []
229 |   (io/make-parents (str out-dir "anything here"))
230 |   (spit-all-to-json exam-data-seq)
231 |   (Thread/sleep (* 1000 60 60 24))
232 |   (recur))
233 | 


--------------------------------------------------------------------------------
/frontend/src/components/OverviewCard/OverviewCard.svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |     import { goto } from "$app/navigation";
  3 |     import Dk from "../../assets/Dk.svelte";
  4 |     import Gb from "../../assets/Gb.svelte";
  5 |     import { empty_overview } from "../../course";
  6 |     import type { Overview } from "../../course";
  7 |     export let course: Overview = structuredClone(empty_overview);
  8 | 
  9 |     /**
 10 |      * This function takes an exam duration and changes the unit depending on the duration, e.g. 120 minutes -> 2 hours
 11 |      */
 12 |     function formatExamDuration(duration: number) {
 13 |         if (duration % 60 == 0) {
 14 |             if (duration % (60 * 24) == 0) {
 15 |                 return `${duration / (60 * 24)}d`;
 16 |             } else {
 17 |                 return `${duration / 60}h`;
 18 |             }
 19 |         } else {
 20 |             return `${duration}m`;
 21 |         }
 22 |     }
 23 | 
 24 |     // this takes a vector of maps ex: [{:type "A"}, {:type "B"}] and returns a vector of strings ex: ["A", "B"]
 25 |     function denest_type_maps(map_vector: { type: string }[]): string[] {
 26 |         let type_vector: string[] = [];
 27 |         for (let i = 0; i < map_vector.length; i++) {
 28 |             type_vector.push(map_vector[i].type);
 29 |         }
 30 |         return type_vector;
 31 |     }
 32 | 
 33 |     function coerce_blocks_to_int(blocks: string[]): number[] {
 34 |         // blocks are written in One Two Three Four
 35 |         // this function converts them to "1" "2" "3" "4"
 36 |         let block_vector: number[] = [];
 37 |         for (let i = 0; i < blocks.length; i++) {
 38 |             switch (blocks[i]) {
 39 |                 case "One":
 40 |                     block_vector.push(1);
 41 |                     break;
 42 |                 case "Two":
 43 |                     block_vector.push(2);
 44 |                     break;
 45 |                 case "Three":
 46 |                     block_vector.push(3);
 47 |                     break;
 48 |                 case "Four":
 49 |                     block_vector.push(4);
 50 |                     break;
 51 |                 case "Summer":
 52 |                     block_vector.push(5);
 53 |                     break;
 54 |             }
 55 |         }
 56 |         return block_vector;
 57 |     }
 58 | 
 59 |     const isCancelled =
 60 |         course.title.toLowerCase().includes("aflyst") ||
 61 |         course.title.toLowerCase().includes("cancelled");
 62 | </script>
 63 | 
 64 | <a
 65 |     class="w-full border-2 border-black animate-fadeIn flex flex-col justify-between hover:bg-gray-100 relative"
 66 |     href="course/{course.id}"
 67 | >
 68 |     <div class="p-2">
 69 |         <div class="flex flex-row justify-between overflow-x-auto">
 70 |             <div class="w-full">
 71 |                 <h1
 72 |                     class="text-l font-bold text-center z-10 {isCancelled
 73 |                         ? 'text-red-500'
 74 |                         : ''}"
 75 |                 >
 76 |                     {course.title}
 77 |                 </h1>
 78 |                 <h2>
 79 |                     {course.id} - SCIENCE
 80 |                 </h2>
 81 |             </div>
 82 |             <table class="text-sm h-8 whitespace-nowrap">
 83 |                 <tbody>
 84 |                     <tr>
 85 |                         <td class="border-e border-b border-black px-1">
 86 |                             {denest_type_maps(course.degree).join(", ")}
 87 |                         </td>
 88 |                         <td class="border-b border-black px-1">
 89 |                             ECTS: {course.ects}
 90 |                         </td>
 91 |                     </tr>
 92 |                     <tr>
 93 |                         <td class="border-e border-black p-1">
 94 |                             Block(s): {coerce_blocks_to_int(
 95 |                                 denest_type_maps(course.block)
 96 |                             )
 97 |                                 .sort((a, b) => a - b) // Ensure numeric sorting
 98 |                                 .reduce((acc: number[][], curr, index, arr) => {
 99 |                                     // Convert consecutive numbers to ranges
100 |                                     if (
101 |                                         index === 0 ||
102 |                                         curr - arr[index - 1] !== 1
103 |                                     ) {
104 |                                         acc.push([curr]); // Start a new range
105 |                                     } else {
106 |                                         acc[acc.length - 1][1] = curr; // Extend the current range
107 |                                     }
108 |                                     return acc;
109 |                                 }, [])
110 |                                 .map((range) => {
111 |                                     if (range.length === 2) {
112 |                                         return `${range[0]}-${range[1]}`;
113 |                                     } else if (range[0] === 5) {
114 |                                         // Map block 5 to "Summer"
115 |                                         return "Summer";
116 |                                     } else {
117 |                                         return `${range[0]}`;
118 |                                     }
119 |                                 })
120 |                                 // Format ranges or single values
121 |                                 .join(", ")}
122 |                         </td>
123 |                         <!--TODO: If this is an "other", this breaks and just shows object object-->
124 |                         <td class="px-1">
125 |                             Group(s): {denest_type_maps(course.schedule)
126 |                                 // TODO: acutally process the string schedules instead of calling them other
127 |                                 .map((x) =>
128 |                                     typeof x === "object" ? "Other" : x
129 |                                 )
130 |                                 .map((x) =>
131 |                                     x == "OutsideOfSchedule" ? "Other" : x
132 |                                 )
133 |                                 .sort()
134 |                                 .join(", ")}
135 |                         </td>
136 |                     </tr>
137 |                 </tbody>
138 |             </table>
139 |         </div>
140 | 
141 |         <p class="break-all">
142 |             {@html course.summary.length > 200
143 |                 ? course.summary.substring(0, 200) + "..."
144 |                 : course.summary}
145 |         </p>
146 |     </div>
147 |     <div class="w-full bg-kuGray text-white flex flex-row">
148 |         <div class="w-full items-center justify-center flex flex-col">
149 |             {#each course.exam as exam}
150 |                 <p class="">
151 |                     {exam.type == "ContinuousAssessment"
152 |                         ? "Continuous Assesment"
153 |                         : exam.type}
154 |                     {#if exam.duration}
155 |                         ({formatExamDuration(exam.duration)})
156 |                     {/if}
157 |                 </p>
158 |             {/each}
159 |         </div>
160 |         <!--stats table, contains pass_rate, median_grade, and avg_grade-->
161 |         {#if course.statistics}
162 |             <table class="text-xs whitespace-nowrap">
163 |                 <tbody>
164 |                     <tr>
165 |                         <td class="border-e border-b border-white px-1">
166 |                             Pass</td
167 |                         >
168 | 
169 |                         <td class="border-b border-white px-1">
170 |                             {course.statistics["pass-rate"] == null
171 |                                 ? "N/A"
172 |                                 : `${
173 |                                       Math.round(
174 |                                           course.statistics["pass-rate"] * 10000
175 |                                       ) / 100
176 |                                   }%`}
177 |                         </td>
178 |                     </tr>
179 |                     <tr>
180 |                         <td class="border-e border-white px-1"> Median</td>
181 |                         <td class="border-white px-1">
182 |                             {course.statistics.median == null
183 |                                 ? "N/A"
184 |                                 : course.statistics.median}
185 |                         </td>
186 |                     </tr>
187 |                     <tr>
188 |                         <td class="border-e border-t border-white px-1">
189 |                             Average
190 |                         </td>
191 |                         <td class="border-t border-white px-1">
192 |                             {course.statistics.mean == null
193 |                                 ? "N/A"
194 |                                 : Math.round(course.statistics.mean * 100) /
195 |                                   100}
196 |                         </td>
197 |                     </tr>
198 |                 </tbody>
199 |             </table>
200 |         {:else}
201 |             <table class="text-xs whitespace-nowrap">
202 |                 <tbody>
203 |                     <tr>
204 |                         <td class="border-e border-b border-white px-1">
205 |                             Pass</td
206 |                         >
207 | 
208 |                         <td class="border-b border-white px-1">N/A</td>
209 |                     </tr>
210 |                     <tr>
211 |                         <td class="border-e border-white px-1"> Median</td>
212 |                         <td class="border-white px-1">N/A</td>
213 |                     </tr>
214 |                     <tr>
215 |                         <td class="border-e border-t border-white px-1">
216 |                             Average
217 |                         </td>
218 |                         <td class="border-t border-white px-1">N/A</td>
219 |                     </tr>
220 |                 </tbody>
221 |             </table>
222 |         {/if}
223 |     </div>
224 |     <!--put this relatively in the bottom right corner of the card-->
225 |     <div class="w-8 h-8 absolute top-0 -top-px opacity-50">
226 |         {#if course.language.filter((lang) => lang.name == "Danish").length > 0}
227 |             <Dk />
228 |         {:else}
229 |             <Gb />
230 |         {/if}
231 |     </div>
232 | </a>
233 | 


--------------------------------------------------------------------------------
/backend/vector_store/src/db.rs:
--------------------------------------------------------------------------------
  1 | use super::{Coordinator, Course};
  2 | use crate::embedding::{CoordinatorEmbedding, CourseEmbedding};
  3 | use crate::populate::Document;
  4 | use anyhow::Result;
  5 | use pgvector::Vector;
  6 | use sqlx::postgres::{PgPool, PgPoolOptions};
  7 | use sqlx::{query, Row};
  8 | 
  9 | pub struct PostgresDB {
 10 |     pub pool: PgPool,
 11 | }
 12 | impl PostgresDB {
 13 |     pub async fn new(db_url: &str) -> Result<Self> {
 14 |         let pool = PgPoolOptions::new()
 15 |             .max_connections(3)
 16 |             .connect(db_url)
 17 |             .await?;
 18 |         Ok(Self { pool })
 19 |     }
 20 | 
 21 |     /// Returns all the course ids that have outdated or non-existent embeddings
 22 |     /// This is computed by checking if the course modified timestamp is greater than the last modified
 23 |     /// timestamp of the title embedding or the content embedding
 24 |     pub async fn get_outdated_embedding_course_ids(&self) -> Result<Vec<String>> {
 25 |         let result = query!(
 26 |             "SELECT c.id
 27 |             FROM course c
 28 |             LEFT JOIN title_embedding te ON c.id = te.course_id
 29 |             LEFT JOIN content_embedding ce ON c.id = ce.course_id
 30 |             WHERE
 31 |                 c.last_modified > COALESCE(te.last_modified, to_timestamp(0)) OR
 32 |                 c.last_modified > COALESCE(ce.last_modified, to_timestamp(0))"
 33 |         )
 34 |         .fetch_all(&self.pool)
 35 |         .await?;
 36 | 
 37 |         let mut ids: Vec<String> = Vec::new();
 38 |         for row in result {
 39 |             ids.push(row.id.expect("id"))
 40 |         }
 41 | 
 42 |         Ok(ids)
 43 |     }
 44 | 
 45 |     /// Returns all the coordinators in the Vec of coordinator emails that do not have an embedding
 46 |     pub async fn get_missing_embedding_email_names(&self) -> Result<Vec<Coordinator>> {
 47 |         // Due to a weird bug, this has to not be a macroo query
 48 |         let result = query(
 49 |             "SELECT coordinator.email, coordinator.full_name
 50 |             FROM coordinator
 51 |             LEFT JOIN name_embedding ne ON coordinator.email = ne.email
 52 |             WHERE ne.embedding IS NULL",
 53 |         )
 54 |         .fetch_all(&self.pool)
 55 |         .await?;
 56 | 
 57 |         let mut coordinators = Vec::new();
 58 |         for row in result {
 59 |             coordinators.push(Coordinator {
 60 |                 email: row.try_get("email")?,
 61 |                 name: row.try_get("full_name")?,
 62 |             });
 63 |         }
 64 | 
 65 |         Ok(coordinators)
 66 |     }
 67 | 
 68 |     /// Returns all the courses in the Vec of course ids
 69 |     pub async fn get_courses_by_ids(&self, ids: &[String]) -> Result<Vec<Course>> {
 70 |         let mut courses = Vec::new();
 71 | 
 72 |         let result = query!(
 73 |             "SELECT id, title, content FROM course WHERE id = ANY($1)",
 74 |             &ids
 75 |         )
 76 |         .fetch_all(&self.pool)
 77 |         .await?;
 78 | 
 79 |         for row in result {
 80 |             let course = Course {
 81 |                 id: row.id,
 82 |                 title: row.title,
 83 |                 content: row.content,
 84 |             };
 85 |             courses.push(course);
 86 |         }
 87 | 
 88 |         Ok(courses)
 89 |     }
 90 | 
 91 |     /// Inserts the document into the database
 92 |     /// If the document already exists, it updates the title, content, and last_modified timestamp
 93 |     /// This is used by populate.rs but is not strictly required
 94 |     /// for the search functionality
 95 |     /// TODO: all insertion functionality should be moved out of this service
 96 |     pub async fn upsert_document(&self, document: &Document) -> Result<()> {
 97 |         // start by checking if the Document is the same as the one in the database
 98 |         // if it is, do nothing
 99 |         let result = query!(
100 |             "SELECT title, content FROM course WHERE id = $1",
101 |             document.info.id
102 |         )
103 |         .fetch_optional(&self.pool)
104 |         .await?;
105 | 
106 |         if let Some(row) = result {
107 |             if row.title == document.title && row.content == document.description.content {
108 |                 return Ok(());
109 |             }
110 |         }
111 | 
112 |         let mut tx = self.pool.begin().await?;
113 | 
114 |         query!(
115 |             "INSERT INTO course (id, title, content) VALUES ($1, $2, $3)
116 |              ON CONFLICT(id) DO UPDATE SET title = $2, content = $3, last_modified = CURRENT_TIMESTAMP",
117 |             document.info.id,
118 |             document.title,
119 |             document.description.content
120 |         ).execute(&mut *tx).await?;
121 | 
122 |         // A coordinator may have been removed, so we need to delete all coordinators for this course
123 |         query!(
124 |             "DELETE FROM course_coordinator WHERE course_id = $1",
125 |             document.info.id
126 |         )
127 |         .execute(&mut *tx)
128 |         .await?;
129 | 
130 |         // no conflict, if the coordinator exists do nothing
131 |         for coordinator in document.logistics.coordinators.iter() {
132 |             query!(
133 |                 "INSERT INTO coordinator (email, full_name) VALUES ($1, $2)
134 |                  ON CONFLICT(email) DO NOTHING",
135 |                 coordinator.email,
136 |                 coordinator.name
137 |             )
138 |             .execute(&mut *tx)
139 |             .await?;
140 | 
141 |             query!(
142 |                 "INSERT INTO course_coordinator (course_id, email) VALUES ($1, $2)",
143 |                 document.info.id,
144 |                 coordinator.email
145 |             )
146 |             .execute(&mut *tx)
147 |             .await?;
148 |         }
149 | 
150 |         tx.commit().await?;
151 |         Ok(())
152 |     }
153 | 
154 |     /// Inserts the coordinator embedding into the database
155 |     /// If the coordinator already exists, it does nothing,
156 |     /// this is because we assume the names of the coordinators are immutable
157 |     pub async fn insert_coordinator_embedding(
158 |         &self,
159 |         coordinator: CoordinatorEmbedding,
160 |     ) -> Result<()> {
161 |         query(
162 |             "INSERT INTO name_embedding (email, embedding) VALUES ($1, $2)
163 |             ON CONFLICT(email) DO NOTHING",
164 |         )
165 |         .bind(coordinator.email)
166 |         .bind(Vector::from(coordinator.name.to_owned()))
167 |         .execute(&self.pool)
168 |         .await?;
169 |         Ok(())
170 |     }
171 | 
172 |     /// Inserts the course embedding into the database
173 |     /// If the course already exists, it updates the embedding and the last_modified timestamp
174 |     pub async fn insert_course_embedding(&self, course_embedding: CourseEmbedding) -> Result<()> {
175 |         let mut tx = self.pool.begin().await?;
176 |         query(
177 |             "INSERT INTO title_embedding (course_id, embedding) VALUES ($1, $2)
178 |             ON CONFLICT(course_id) DO UPDATE SET embedding = $2, last_modified = CURRENT_TIMESTAMP",
179 |         )
180 |         .bind(&course_embedding.id)
181 |         .bind(Vector::from(course_embedding.title.to_owned()))
182 |         .execute(&mut *tx)
183 |         .await?;
184 | 
185 |         query(
186 |             "INSERT INTO content_embedding (course_id, embedding) VALUES ($1, $2)
187 |             ON CONFLICT(course_id) DO UPDATE SET embedding = $2, last_modified = CURRENT_TIMESTAMP",
188 |         )
189 |         .bind(course_embedding.id)
190 |         .bind(Vector::from(course_embedding.content.to_owned()))
191 |         .execute(&mut *tx)
192 |         .await?;
193 | 
194 |         tx.commit().await?;
195 |         Ok(())
196 |     }
197 | 
198 |     /// Returns the most relevant course ids based on the query embedding
199 |     /// The title embedding is the title for that course
200 |     /// The content embedding is the content for that course
201 |     /// The coordinator embedding for each course is the most relevant coordinator for that course,
202 |     /// if the coordinator's distance is greater than 0.8, it is clipped to 0.9, if it is less then it is halved
203 |     /// to give it more importance in the total distance
204 |     /// The relevance is then computed as the sum of the distances between the query embedding and the
205 |     /// title embedding, content embedding, and coordinator embedding
206 |     /// and is returned in ascending order (lower is better)
207 |     pub async fn get_most_relevant_course_ids(
208 |         &self,
209 |         query_embedding: &[f32],
210 |     ) -> Result<Vec<String>> {
211 |         let result = query("
212 | WITH
213 | title_search AS (
214 | SELECT
215 |     course_id, embedding <-> $1 AS distance
216 | FROM
217 |     title_embedding
218 | ),
219 | 
220 | content_search AS (
221 | SELECT
222 |     course_id, embedding <-> $1 AS distance
223 | FROM
224 |     content_embedding
225 | ),
226 | 
227 | coordinator_search AS (
228 | SELECT
229 |     course_id, MIN(
230 |         CASE
231 |             WHEN embedding <-> $1 > 0.8 THEN 0.9
232 |             ELSE (embedding <-> $1) / 2
233 |         END) AS distance
234 | FROM
235 |     course_coordinator
236 |     INNER JOIN
237 |         name_embedding
238 |     ON
239 |         course_coordinator.email = name_embedding.email
240 | GROUP BY course_id),
241 | 
242 | 
243 | 
244 | combined_search AS (
245 |     SELECT
246 |         course.id,
247 |         title_search.distance + content_search.distance + coordinator_search.distance AS total_distance
248 |     FROM
249 |         title_search
250 |     INNER JOIN
251 |         content_search ON title_search.course_id = content_search.course_id
252 |     INNER JOIN
253 |         coordinator_search ON title_search.course_id = coordinator_search.course_id
254 |     INNER JOIN
255 |         course ON title_search.course_id = course.id
256 | ),
257 | 
258 | ranked_courses AS (
259 |     SELECT
260 |         id,
261 |         total_distance,
262 |         ROW_NUMBER() OVER (PARTITION BY id ORDER BY total_distance) AS rn
263 |     FROM
264 |         combined_search
265 | )
266 | 
267 | SELECT
268 |     id
269 | FROM
270 |     ranked_courses
271 | WHERE
272 |     rn = 1
273 | ORDER BY
274 |     total_distance;
275 | ")
276 |         .bind(Vector::from(query_embedding.to_owned()))
277 |         .fetch_all(&self.pool)
278 |         .await?;
279 |         let mut ids: Vec<String> = Vec::new();
280 |         for row in result {
281 |             ids.push(row.try_get("id")?);
282 |         }
283 | 
284 |         Ok(ids)
285 |     }
286 | }
287 | 


--------------------------------------------------------------------------------
/frontend/src/routes/course/[courseId]/+page.svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |     import { page } from "$app/stores";
  3 |     import { empty_course, total_hours } from "../../../course";
  4 |     import { onMount } from "svelte";
  5 |     import Loader from "../../../components/Loader/Loader.svelte";
  6 |     import SideCard from "../../../components/SideCard.svelte";
  7 |     import Footer from "../../../components/Footer/Footer.svelte";
  8 | 
  9 |     import GradeGraph from "../../../components/GradeGraph/GradeGraph.svelte";
 10 | 
 11 |     let { data } = $props();
 12 |     let { courseId, course, totalHours, statistics, loading } = data;
 13 |     const isCancelled =
 14 |         course.title.toLowerCase().includes("aflyst") ||
 15 |         course.title.toLowerCase().includes("cancelled");
 16 | 
 17 |     /**
 18 |      * This function takes an exam duration and changes the unit depending on the duration, e.g. 120 minutes -> 2 hours
 19 |      */
 20 |     function formatExamDuration(duration: number) {
 21 |         if (duration % 60 == 0) {
 22 |             if (duration % (60 * 24) == 0) {
 23 |                 return `${duration / (60 * 24)}d`;
 24 |             } else {
 25 |                 return `${duration / 60}h`;
 26 |             }
 27 |         } else {
 28 |             return `${duration}m`;
 29 |         }
 30 |     }
 31 | 
 32 |     // SEO
 33 |     const title = `${course.title} - KU Courses`;
 34 | 
 35 |     const description =
 36 |         "A more precise, user-friendly way to browse courses offered by University of Copenhagen which acutally gives you the information you were looking for";
 37 |     const url = "https://kucourses.dk/course/" + courseId;
 38 | 
 39 |     // To every li tag, add class="list-square list-inside" in content["learning-outcome"]
 40 | 
 41 |     let content = course.content
 42 |         ? course.content.replaceAll(
 43 |               "<li>",
 44 |               '<li class="list-square list-inside ml-4">'
 45 |           )
 46 |         : null;
 47 | 
 48 |     let learning_outcome = course["learning-outcome"]
 49 |         ? course["learning-outcome"].replaceAll(
 50 |               "<li>",
 51 |               '<li class="list-square list-inside ml-4">'
 52 |           )
 53 |         : null;
 54 | 
 55 |     // this takes a vector of maps ex: [{:type "A"}, {:type "B"}] and returns a vector of strings ex: ["A", "B"]
 56 |     function denest_type_maps(map_vector: any) {
 57 |         let type_vector: string[] = [];
 58 |         for (let i = 0; i < map_vector.length; i++) {
 59 |             type_vector.push(map_vector[i].type);
 60 |         }
 61 |         return type_vector;
 62 |     }
 63 | 
 64 |     function coerce_blocks_to_int(blocks: any) {
 65 |         // blocks are written in One Two Three Four
 66 |         // this function converts them to "1" "2" "3" "4"
 67 |         let block_vector: string[] = [];
 68 |         for (let i = 0; i < blocks.length; i++) {
 69 |             switch (blocks[i]) {
 70 |                 case "One":
 71 |                     block_vector.push("1");
 72 |                     break;
 73 |                 case "Two":
 74 |                     block_vector.push("2");
 75 |                     break;
 76 |                 case "Three":
 77 |                     block_vector.push("3");
 78 |                     break;
 79 |                 case "Four":
 80 |                     block_vector.push("4");
 81 |                     break;
 82 |                 default:
 83 |                     block_vector.push(blocks[i]);
 84 |                     break;
 85 |             }
 86 |         }
 87 |         return block_vector;
 88 |     }
 89 |     function separate_capitals_letters(sentence: string) {
 90 |         return sentence.replace(/([A-Z])/g, " $1").trim();
 91 |     }
 92 | 
 93 |     function remove_repeated_br_tags(dom: Document) {
 94 |         let brs = dom.getElementsByTagName("br");
 95 |         for (let i = 0; i < brs.length; i++) {
 96 |             if (brs[i].nextSibling != null) {
 97 |                 brs[i].nextSibling!.remove();
 98 |             }
 99 |         }
100 |     }
101 | 
102 |     // If a p tag is immediately descendent of an li tag, we pop the text out of the p tag
103 |     function pop_p_tags(dom: Document) {
104 |         let lis = dom.getElementsByTagName("li");
105 |         for (let i = 0; i < lis.length; i++) {
106 |             let p = lis[i].getElementsByTagName("p");
107 |             if (p.length > 0) {
108 |                 lis[i].innerHTML = p[0].innerHTML;
109 |             }
110 |         }
111 |     }
112 | 
113 |     onMount(() => {
114 |         remove_repeated_br_tags(document);
115 |         pop_p_tags(document);
116 |     });
117 | </script>
118 | 
119 | <svelte:head>
120 |     <title>{title}</title>
121 |     <meta name="description" content={description} />
122 | 
123 |     <!-- Facebook Meta Tags -->
124 |     <meta property="og:url" content={url} />
125 |     <meta property="og:type" content="website" />
126 |     <meta property="og:title" content={title} />
127 |     <meta property="og:description" content={description} />
128 |     <meta property="og:image" content={`/assets/og-image.png`} />
129 |     <meta property="og:image:alt" content="KU Courses" />
130 |     <meta property="og:image:width" content="1200" />
131 |     <meta property="og:image:height" content="630" />
132 | 
133 |     <!-- Twitter Meta Tags -->
134 |     <meta name="twitter:card" content="summary_large_image" />
135 |     <meta property="twitter:domain" content="kucourses.dk" />
136 |     <meta property="twitter:url" content={url} />
137 |     <meta name="twitter:title" content={title} />
138 |     <meta name="twitter:description" content={description} />
139 |     <meta name="twitter:image" content={`/assets/og-image.png`} />
140 | 
141 |     <link rel="canonical" href={url} />
142 | </svelte:head>
143 | 
144 | {#if loading}
145 |     <Loader />
146 | {:else}
147 |     <a
148 |         class="block sticky top-0 left-1/2 -translate-x-1/2 mx-auto w-fit bg-kuRed text-white font-bold py-1 px-6"
149 |         href="/"
150 |     >
151 |         Back
152 |     </a>
153 |     <div class="min-h-screen mx-auto px-10 lg:px-0 lg:w-[900px] mt-6">
154 |         <div class="items-left mb-5 px-4 text-center">
155 |             <h1
156 |                 class="text-2xl font-bold md:text-4xl {isCancelled
157 |                     ? 'text-red-500'
158 |                     : ''}
159 |         "
160 |             >
161 |                 {course.title}
162 |             </h1>
163 |             <h2>{course.id} - SCIENCE</h2>
164 |         </div>
165 | 
166 |         <div class="block md:grid md:grid-cols-[auto_1fr] md:gap-x-10">
167 |             <div class="md:col-span-2">
168 |                 {#if course["statistics"] != null && statistics != null}
169 |                     <p>
170 |                         Passed: {Math.round(statistics["pass-rate"] * 100)}%,
171 |                         Average grade: {Math.round(statistics["mean"] * 100) /
172 |                             100}, Median grade: {statistics["median"]}
173 |                     </p>
174 |                     <GradeGraph data={statistics.grades} />
175 |                 {/if}
176 |             </div>
177 |             <div>
178 |                 <h1 class="text-xl font-bold">Description</h1>
179 | 
180 |                 {@html content}
181 |                 {@html learning_outcome}
182 |                 {#if course["recommended-qualifications"] != null}
183 |                     <h2 class="text-l font-bold">Recommended qualifications</h2>
184 |                     {@html course["recommended-qualifications"]}
185 |                 {/if}
186 |             </div>
187 |             <div>
188 |                 <SideCard heading={"Coordinators"}>
189 |                     {#each course.coordinator as emp}
190 |                         <div class="">
191 |                             <p class="">{emp.name}</p>
192 |                         </div>
193 |                         <p class="">{emp.email}</p>
194 |                     {/each}
195 |                 </SideCard>
196 |                 <SideCard heading={"Exam"}>
197 |                     {#each course.exam as exam}
198 |                         <p class="">
199 |                             {separate_capitals_letters(exam.type)}
200 |                             {#if exam.duration}
201 |                                 - ({formatExamDuration(exam.duration)})
202 |                             {/if}
203 |                         </p>
204 |                     {/each}
205 |                 </SideCard>
206 |                 <SideCard heading={"Course Info"}>
207 |                     <p class="">
208 |                         Level: {denest_type_maps(course.degree).join("\n")}
209 |                     </p>
210 |                     <p class="">ECTS: {course.ects}</p>
211 | 
212 |                     <p class="">
213 |                         Block(s): {coerce_blocks_to_int(
214 |                             denest_type_maps(course.block)
215 |                         )
216 |                             .sort()
217 |                             .join(", ")}
218 |                     </p>
219 |                     <p class="">
220 |                         Group(s): {denest_type_maps(course.schedule)
221 |                             .sort()
222 |                             .join(", ")}
223 |                     </p>
224 | 
225 |                     <p class="flex flex-col"></p>
226 | 
227 |                     <a
228 |                         href={`https://kurser.ku.dk/course/${course.id}`}
229 |                         class="text-kuRed font-bold"
230 |                     >
231 |                         Go to official page
232 |                     </a>
233 |                 </SideCard>
234 |                 <SideCard heading={"Department(s)"}>
235 |                     <ul class="list-square">
236 |                         {#each course.department as dep}
237 |                             <li class="">
238 |                                 {separate_capitals_letters(dep.name)}
239 |                             </li>
240 |                         {/each}
241 |                     </ul>
242 |                 </SideCard>
243 |                 <SideCard heading={"Workload"}>
244 |                     <table>
245 |                         <tbody>
246 |                             {#each course.workload as wl}
247 |                                 <tr class="border-b-4 border-kuGray">
248 |                                     <td class="">
249 |                                         {separate_capitals_letters(wl.type)}</td
250 |                                     >
251 |                                     <td class="pl-2">{wl.hours}h</td>
252 |                                 </tr>
253 |                             {/each}
254 |                         </tbody>
255 |                     </table>
256 |                     <p class="font-bold">Total: {totalHours}h</p>
257 |                 </SideCard>
258 |             </div>
259 |         </div>
260 | 
261 |         <Footer />
262 |     </div>
263 | {/if}
264 | 


--------------------------------------------------------------------------------
/backend/db-manager/src/db_manager/db.clj:
--------------------------------------------------------------------------------
  1 | (ns db-manager.db
  2 |   (:require [datascript.core :as d]
  3 |             [org.httpkit.client :as http]
  4 |             [clojure.data.json :as json]
  5 |             [clojure.walk :refer [postwalk]]))
  6 | 
  7 | (def many-ref {:db/valueType :db.type/ref
  8 |                :db/cardinality :db.cardinality/many})
  9 | (def one-ref {:db/valueType :db.type/ref
 10 |               :db/cardinality :db.cardinality/one})
 11 | (def unique {:db/unique :db.unique/identity})
 12 | 
 13 | (defn component [schema]
 14 |   (assoc schema :db/isComponent true))
 15 | 
 16 | (def schema
 17 |   {:course/id unique
 18 |    :course/title {}
 19 |    :course/ects {}
 20 | 
 21 |    :course/block many-ref
 22 | 
 23 |    :course/schedule many-ref
 24 | 
 25 |    :course/language many-ref
 26 |    :course/duration {:db/cardinality :db.cardinality/one}
 27 |    :course/degree many-ref
 28 |    :course/capacity {:db/cardinality :db.cardinality/one}
 29 |    :course/department many-ref
 30 |    :course/faculty one-ref
 31 |    :course/coordinator many-ref
 32 | 
 33 |    :course/workload (component many-ref)
 34 | 
 35 |    :course/exam (component many-ref)
 36 |    :course/content {}
 37 |    :course/learning-outcome {}
 38 |    :course/recommended-qualifications {}
 39 |    :course/statistics (component one-ref)
 40 | 
 41 |    :schedule/type unique
 42 |    :block/type unique
 43 |    :faculty/name unique
 44 |    :department/name unique
 45 |    :degree/type unique
 46 |    :language/name unique
 47 |    :employee/email unique
 48 |    :employee/name {}
 49 |    :statistics/mean {:db/cardinality :db.cardinality/one}
 50 |    :statistics/median {:db/cardinality :db.cardinality/one}
 51 |    :statistics/pass-rate {:db/cardinality :db.cardinality/one}})
 52 | 
 53 | (defn convert-coordinator
 54 |   "Convert a coordinator map from rust parser to a datascript map"
 55 |   [coordinator]
 56 |   (let [name (get coordinator "name")
 57 |         email (get coordinator "email")]
 58 |     {:employee/name name
 59 |      :employee/email email}))
 60 | (defn convert-workload
 61 |   "Convert a workload map from rust parser to a datascript map"
 62 |   [workload]
 63 |   (let [type (get workload "workload_type")
 64 |         hours (get workload "hours")]
 65 |     {:workload/type type
 66 |      :workload/hours hours}))
 67 | (defn convert-exam [exam]
 68 |   ; this can either be a string or a map, if its a string then it has no duration
 69 |   (if (string? exam)
 70 |     {:exam/type exam}
 71 |     ; the key is the exam type, the value is the duration
 72 |     ; ensure that the map is exactly 1 element
 73 |     (if (= 1 (count exam))
 74 |       (let [[exam-type duration] (first exam)]
 75 |         {:exam/type exam-type
 76 |          :exam/duration duration})
 77 |       (throw (Exception. "Exam map has more than 1 element, this should be impossible")))))
 78 | 
 79 | (defn remove-nils
 80 |   "As hinted by the name, it traverses the entire map and removes all fields with nils
 81 |     This is necessary because the rust parser returns a lot of nils, and datascript does not like nils
 82 |     Snippet from https://stackoverflow.com/questions/3937661/remove-nil-values-from-a-map"
 83 |   [m]
 84 |   (let [f (fn [[k v]] (when v [k v]))]
 85 |     (postwalk (fn [x] (if (map? x) (into {} (map f x)) x)) m)))
 86 | 
 87 | (defn remove-db-ids
 88 |   [coll]
 89 |   (postwalk (fn [x] (if (map? x) (dissoc x :db/id) x)) coll))
 90 | 
 91 | (defn course-to-transaction  [course-map]
 92 |   (let [id (get-in course-map ["info" "id"])
 93 |         title (get course-map "title")
 94 |         ects (get-in course-map ["info" "ects"])
 95 |         blocks (get-in course-map ["info" "block"])
 96 |         schedules (get-in course-map ["info" "schedule"])
 97 |         languages (get-in course-map ["info" "language"])
 98 |         duration (get-in course-map ["info" "duration"])
 99 |         degrees (get-in course-map ["info" "degree"])
100 |         capacity (get-in course-map ["info" "capacity"])
101 |         departments (get-in course-map ["logistics" "departments"])
102 |         faculty (get-in course-map ["logistics" "faculty"])
103 |         coordinators (map convert-coordinator (get-in course-map ["logistics" "coordinators"]))
104 |         workloads (map convert-workload (get course-map "workloads"))
105 |         exams (map convert-exam (get course-map "exams"))
106 |         content (get-in course-map ["description" "content"])
107 |         learning-outcome (get-in course-map ["description" "learning_outcome"])
108 |         recommended-qualifications (get-in course-map ["description" "recommended_qualifications"])
109 |         summary (get-in course-map ["description" "summary"])]
110 |     ; FIXME: Why is this here
111 |     (when (empty? departments)
112 |       (println "Course " title " has no departments"))
113 |     {:course/id id
114 |      :course/title title
115 |      :course/ects ects
116 |      :course/block (mapv #(hash-map :block/type %) blocks)
117 |      :course/schedule (mapv #(hash-map :schedule/type %) schedules)
118 |      :course/language (mapv #(hash-map :language/name %) languages)
119 |      :course/duration duration
120 |      :course/degree (mapv #(hash-map :degree/type %) degrees)
121 |      :course/capacity capacity
122 |      :course/department (mapv #(hash-map :department/name %) departments)
123 |      :course/faculty (hash-map :faculty/name faculty)
124 |      :course/coordinator coordinators
125 |      :course/workload workloads
126 |      :course/exam exams
127 |      :course/content content
128 |      :course/learning-outcome learning-outcome
129 |      :course/recommended-qualifications (if (nil? recommended-qualifications) "" recommended-qualifications)
130 |      :course/summary summary}))
131 | 
132 | (defn courses-to-transactions [courses]
133 |   (map course-to-transaction courses))
134 | 
135 | (defn get-course-ids [conn]
136 |   (let [course-ids (d/q '[:find ?id
137 |                           :where
138 |                           [?e :course/id ?id]]
139 |                         @conn)]
140 |     ; this is a vector of vectors, we want a vector of strings
141 |     (mapv first course-ids)))
142 | (defn get-course-by-id
143 |   "Find all the detailed information about a course by its id"
144 |   [conn course-id]
145 |   (let [course (d/pull @conn '[* {:course/schedule [*]
146 |                                   :course/exam [*]
147 |                                   :course/degree [*]
148 |                                   :course/block [*]
149 |                                   :course/faculty [*]
150 |                                   :course/department [*]
151 |                                   :course/coordinator [*]
152 |                                   :course/workload [*]
153 |                                   :course/language [*]
154 |                                   :course/statistics [*]}]
155 |                        [:course/id course-id])]
156 |     ; remove summary since we already bring it along from content
157 |     (remove-db-ids (dissoc course :course/summary))))
158 | 
159 | ; denest a vector of vectors
160 | (defn denest [v]
161 |   (mapv first v))
162 | 
163 | (defn search-vector-store [query]
164 |   ; send http request to localhost:4000/search
165 |   (let [response @(http/get "http://vectorstore:4000/search" {:query-params {:query query}})]
166 |     (if (= (:status response) 200)
167 |       (let [body (:body response)]
168 |         (json/read-str body))
169 |       (do
170 |         (println response)
171 |         (throw (Exception. "Search request failed, the vectorstore is down or unhealthy"))))))
172 | 
173 | (defn query-course-ids [conn predicate-map]
174 |   (let [blocks (get predicate-map :blocks)
175 |         schedules (get predicate-map :schedules)
176 |         exams (get predicate-map :exams)
177 |         degrees (get predicate-map :degrees)
178 |         departments (get predicate-map :departments)
179 |         languages (get predicate-map :languages)
180 |         search (get predicate-map :search)
181 |         courses (denest (d/q (concat '[:find ?course-id :in $
182 |                                        :where
183 |                                        [?e :course/block ?block]
184 |                                        [?e :course/id ?course-id]
185 |                                        [?e :course/schedule ?schedule]
186 |                                        [?e :course/exam ?exam]
187 |                                        [?e :course/degree ?degree]
188 |                                        [?e :course/department ?department]
189 |                                        [?e :course/language ?language]]
190 |                                      (if (empty? blocks)
191 |                                        []
192 |                                        (list (cons 'or (mapv (fn [block] (vector '?block ':block/type block)) blocks))))
193 | 
194 |                                      (if (empty? schedules)
195 |                                        []
196 |                                        (list (cons 'or (mapv (fn [schedule] (vector '?schedule ':schedule/type schedule)) schedules))))
197 | 
198 |                                      (if (empty? exams)
199 |                                        []
200 |                                        (list (cons 'or (mapv (fn [exam] (vector '?exam ':exam/type exam)) exams))))
201 | 
202 |                                      (if (empty? degrees)
203 |                                        []
204 |                                        (list (cons 'or (mapv (fn [degree] (vector '?degree ':degree/type degree)) degrees))))
205 | 
206 |                                      (if (empty? departments)
207 |                                        []
208 |                                        (list (cons 'or (mapv (fn [department] (vector '?department ':department/name department)) departments))))
209 |                                      (if (empty? languages)
210 |                                        []
211 |                                        (list (cons 'or (mapv (fn [language] (vector '?language ':language/name language)) languages)))))
212 |                              @conn))]
213 |     (if (empty? search)
214 |       courses
215 |         ; we get a list of IDs from the search vector store, we need to find all the courses in
216 |         ; the returned courses which are in the vector store list whilst preserving the order
217 |       (let [search-result (search-vector-store search)]
218 |         (if (nil? search-result)
219 |           courses
220 |           ; perform an intersection of the two lists, but preserve the order of the first list
221 |           (filter #(contains? (set courses) %) search-result))))))
222 | 
223 | (defn get-overviews-from-ids [conn ids]
224 |   (d/pull-many @conn '[:course/id
225 |                        :course/title
226 |                        :course/ects
227 |                        :course/summary
228 |                        {:course/schedule [*]
229 |                         :course/block [*]
230 |                         :course/exam [*]
231 |                         :course/degree [*]
232 |                         :course/language [*]
233 |                         :course/statistics [:statistics/mean
234 |                                             :statistics/median
235 |                                             :statistics/pass-rate]}]
236 |                (mapv #(vector :course/id %) ids)))
237 | 
238 | (defn get-courses [conn predicate-map]
239 |   (let [course-ids (query-course-ids conn predicate-map)]
240 |     (map remove-db-ids (get-overviews-from-ids conn course-ids))))
241 | 


--------------------------------------------------------------------------------
/backend/rust_parser/src/main.rs:
--------------------------------------------------------------------------------
  1 | use std::env;
  2 | 
  3 | use std::time;
  4 | pub mod parser;
  5 | 
  6 | //const DEFAULT_DATA_DIR: &str = "../../data";
  7 | //const HTMLS_DIR: &str = "../../data/pages";
  8 | //const TEST_DIR: &str = "./test_data";
  9 | //const TEST_HTMLS_DIR: &str = "./test_data/pages";
 10 | //const JSON_DIR: &str = "../../data/new_json";
 11 | 
 12 | // make a function that takes a path and returns the number of fails and the total number of courses
 13 | fn count_fails(htmls_dir: &str, json_dir: &str) -> (usize, usize) {
 14 |     let mut fails = 0;
 15 |     let mut passes = 0;
 16 |     let dir = std::fs::read_dir(htmls_dir).unwrap();
 17 |     for entry in dir {
 18 |         let entry = entry.unwrap();
 19 |         // read the string from the file
 20 |         let html = std::fs::read_to_string(entry.path()).unwrap();
 21 |         // parse the string
 22 |         let course = parser::parse_course(&html);
 23 |         // if the error cause (this is an anyhow context) contains <EXPECTED>, then we ignore it and continue
 24 |         match course {
 25 |             Ok(c) => {
 26 |                 // emit json to file
 27 |                 let json = serde_json::to_string(&c).unwrap();
 28 |                 let path = format!("{}/{}.json", json_dir, c.info.id);
 29 |                 std::fs::write(path, json).unwrap();
 30 |                 passes += 1;
 31 |             }
 32 | 
 33 |             Err(e) => {
 34 |                 // if any of the causes contain <EXPECTED>, then we ignore it and continue
 35 |                 if e.chain().any(|c| c.to_string().contains("<EXPECTED>")) {
 36 |                     continue;
 37 |                 } else {
 38 |                     fails += 1;
 39 |                     println!("Error: {e:?}\n\n");
 40 |                 }
 41 |             }
 42 |         }
 43 |     }
 44 |     (fails, passes)
 45 | }
 46 | 
 47 | fn main() {
 48 |     let args: Vec<String> = env::args().collect();
 49 |     let timer = time::Instant::now();
 50 |     let html_dir = &args[1];
 51 |     let json_dir = &args[2];
 52 | 
 53 |     // print all files in the html directory
 54 |     let _dir = std::fs::read_dir(html_dir).unwrap();
 55 |     println!("fails and total: {:?}", count_fails(html_dir, json_dir));
 56 | 
 57 |     println!("Time elapsed: {:?}", timer.elapsed());
 58 | }
 59 | 
 60 | #[cfg(test)]
 61 | mod tests {
 62 |     use super::*;
 63 |     use pretty_assertions::assert_eq;
 64 |     const TEST_HTMLS_DIR: &str = "./test_data/pages";
 65 |     use parser::{
 66 |         Coordinator, Course, Department, Description, Exam, Faculty, LogisticInformation, Workload,
 67 |         WorkloadType,
 68 |     };
 69 | 
 70 |     // check that there are files in PAGES_DIR
 71 |     #[test]
 72 |     fn test_pages_dir() {
 73 |         let dir = std::fs::read_dir(TEST_HTMLS_DIR).unwrap();
 74 |         assert!(dir.count() > 0);
 75 |     }
 76 | 
 77 |     #[test]
 78 |     fn test_lsls10061_u() {
 79 |         let html = std::fs::read_to_string(format!("{TEST_HTMLS_DIR}/LSLS10061U.html")).unwrap();
 80 |         let course = parser::parse_course(&html);
 81 |         let expected_course = Course {
 82 |             title: "International Naturforvaltning".to_string(),
 83 |             info: parser::CourseInformation::new(
 84 |                 "LSLS10061U".to_string(),
 85 |                 7.5,
 86 |                 vec![parser::Block::Two],
 87 |                 vec![parser::Schedule::B],
 88 |                 vec![parser::Language::Danish],
 89 |                 parser::Duration::One,
 90 |                 vec![parser::Degree::Bachelor],
 91 |                 parser::Capacity(Some(70)),
 92 |             ),
 93 |             description: Description {
 94 |                 content: "<p><strong>Skov og naturressourcer globalt:</strong></p><p>Globale klima- og vegetationszoner og deres økologi</p><p>Verdens naturressourcer, skove, nationalparker og\nnaturområder</p><p>Naturbeskyttelse og bevarelsesprogrammer</p><p>Globale skovopgørelser, fakta og trends, klassifikationer\u{a0}-\nog problematikker</p><p>Udfordringer i forhold til en bæredygtig anvendelse af\nnaturressourcer globalt</p><p>\u{a0}</p><p><strong>International forvaltning:</strong></p><p>International skov og naturressourceforvaltning - politisk,\ninstitutionelt og økonomisk</p><p>- Skovpolitik og regeringsførelse</p><p>- Internationale konventioner</p><p>- Regional EU politik og regelsæt\u{a0}</p><p>International skov og naturressourceforvaltning - decentrale\nsystemer\u{a0}</p><p>- Agroforestry</p><p>- Samfundsbaseret naturressourceforvaltning, indfødte folk og\nviden</p><p>Naturressourcebaserede konflikter</p><p>Klimaændringer ift. skov og natur</p><p>\u{a0}</p><p><strong>Produktion, markedet og handel:</strong></p><p>International skov og naturressourceforvaltning -\nkommercielt</p><p>- Plantage-\u{a0} koncessionsskovbrug</p><p>- Dyrkningssystemer og problematikker, optimering og\ngenbevarelse</p><p>- International handel med træprodukter \u{a0}</p><p>- Ulovlig hugst og handel</p><p>- Global og EU markedsbaseret regulering (skovcertificering,\nlegalitetsverificering, FLEGT)</p>".to_string(),
 95 |                 learning_outcome: "<p>Kursets overordnede formål er at give den enkelte studerende\nviden om de vilkår, der danner rammerne for forvaltningen af skov\nog naturressourcer verden over. Det er også at give en\ngrundlæggende forståelse for hvordan de internationale rammer og\nvilkår har betydning for den måde, som skov og naturressourcer\nforvaltes i Danmark.</p><p><br></br><strong>Viden:</strong></p><p>- Kendskab til vækstvilkår og vegetationstyper globalt</p><p>- Kendskab til direkte og underliggende årsager til afskovning\nog over-udnyttelse af ressourcer</p><p>- Indsigt i koncessionsskovbrug, plantagedrift og\nagro-forestry</p><p>- Kendskab til internationale konventioner og EU lovgivning på\nskov og naturressourceområdet</p><p>- Kendsakbs til bevaringsklassifikationer og naturbeskyttelse i\npraksis</p><p>- Kendskab til de mest almindelige\u{a0}tømmertræarter</p><p>- Kendskab til det internationale markeds betydning for\nforvaltningen af skov- og naturressourcer</p><p>- Indsigt i mekanismerne der driver ulovlig hugst og handel med\ntræ</p><p>- Indsigt i markedsbaseret regulering</p><p>- Kendskab til de mest almindelige certificeringssystemer</p><p>- Indsigt i betydningen af\u{a0}klimaforandringer\u{a0}for den\nmåde skov og naturressourcer\u{a0} forvaltes\u{a0}i fremtiden</p><p>\u{a0}</p><p><strong>Færdigheder:</strong></p><p>- Redegøre for hvordan de naturgivne vækstvilkår i forskellige\nvegetationszoner har betydning for den måde som skov – og\nnaturressourcer i disse zoner bør forvaltes</p><p>- Redegøre for og diskutere hvordan problemer over-udnyttelse af\nskov og naturressourcer kan afhjælpes\n<br></br><br></br>\n- Redegøre for hvordan internationale konventioner og EU lovgivning\nhar betydning for forvaltningen af skov og naturressourcer\nlokalt</p><p>- Analysere globale markedstrends og forklare hvordan disse har\nbetydning for forvaltningen af skov- og naturressourcer</p><p>- Redegøre for de grundlæggende principper bag decentraliseret\nressourceforvaltning</p><p>- Identificere de udfordringer, som forvaltningen af\nfællesressourcer kan medføre</p><p>- Forklare hvordan markedsbaseret regulering af skov og\nnaturressourceforvaltning fungerer</p><p>- Redegøre for de vigtigste drivere bag ulovlig hugst og handel\nsamt hvordan det kan bekæmpes</p><p>- Opstille praktiske løsninger for hvordan negative konsekvenser\naf klimaforandringer kan afhjælpes</p><p>- Kan argumentere for og imod decentraliseret skov- og\nnaturressourceforvaltning</p><p><br></br><strong>Kompetencer:</strong></p><p>- Arbejde med skov og naturressourceforvaltning under de\nrammevilkår som internationale konventioner og EU lovgivning\ngiver</p><p>- Bidrage til at afhjælpe degradering og over-udnyttelse af skov\nog naturressourcer</p><p>- Indgå i samarbejder omkring forvaltningen af skov og\nnaturressource i forskellige klimazoner - både nationalt og\ninternationalt</p><p>- Tage informerede driftsbeslutninger under hensyntagen til\nnationale og internationale markedstrends</p><p>- Arbejde med certificering og legalitetsveriticering</p><p>- Integrere klimatilpasningshensyn i forvaltningen af skov og\nnaturressourcer</p>".to_string(),
 96 |                 recommended_qualifications: Some("Basal forståelse for\nnaturforvaltning og/eller samfundsvidenskab.\n<br></br>\nSprogkundskaber til at kunne læse og forstå engelsk faglitteratur.\n<br></br>\nTilnærmelsesvis alt litteratur er på engelsk.".to_string()),
 97 |                 summary: "Skov og naturressourcer globalt:Globale klima- og vegetationszoner og deres økologiVerdens naturressourcer, skove, nationalparker og\nnaturområderNaturbeskyttelse og bevarelsesprogrammerGlobale skovopgørelser, fakta og trends, klassifikationer\u{a0}-\nog problematikkerUdfordringer i forhold til en bæredygt".to_string(),
 98 |             },
 99 |             exams: vec![Exam::Oral(Some(30))],
100 |             /*
101 |              * >    workloads: [
102 | >        Workload {
103 | >            workload_type: Lectures,
104 | >            hours: 98.0,
105 | >        },
106 | >        Workload {
107 | >            workload_type: Preparation,
108 | >            hours: 97.0,
109 | >        },
110 | >        Workload {
111 | >            workload_type: ProjectWork,
112 | >            hours: 8.0,
113 | >        },
114 | >        Workload {
115 | >            workload_type: Exam,
116 | >            hours: 1.0,
117 | >        },
118 | >    ],
119 |              */
120 |             workloads: vec![
121 |                 Workload::new(WorkloadType::Lectures, 98.0),
122 |                 Workload::new(WorkloadType::Preparation, 97.0),
123 |                 Workload::new(WorkloadType::ProjectWork, 8.0),
124 |                 Workload::new(WorkloadType::Exam, 1.0),
125 |             ],
126 | 
127 |             logistics: LogisticInformation::new(
128 |                 vec![Department::GeosciencesAndNaturalResourceManagement],
129 |                 Faculty::Science,
130 |                 vec![Coordinator::new(
131 |                     "Kirsten Carlsen".into(),
132 |                     "bdk748@alumni.ku.dk".into(),
133 |                 )],
134 |             ),
135 |         };
136 |         assert_eq!(expected_course, course.unwrap());
137 |     }
138 | 
139 |     // We need to ignore the duration if the course is known to be a summer course.
140 |     #[ignore]
141 |     #[test]
142 |     fn test_nbik15000_u() {
143 |         let html = std::fs::read_to_string(format!("{TEST_HTMLS_DIR}/NBIK15000U.html")).unwrap();
144 |         let course = parser::parse_course(&html);
145 |         let expected_course = Course {
146 |             title: "BAdvanced Plant Identification".to_string(),
147 |             info: parser::CourseInformation::new(
148 |                 "NBIK15000U".to_string(),
149 |                 7.5,
150 |                 vec![parser::Block::Summer],
151 |                 vec![parser::Schedule::B], // doesnt exist
152 |                 vec![parser::Language::English],
153 |                 parser::Duration::One,
154 |                 vec![parser::Degree::Master],
155 |                 parser::Capacity(Some(16)),
156 |             ),
157 |             description: Description {
158 |                 content: "".to_string(),
159 |                 learning_outcome: "".to_string(),
160 |                 recommended_qualifications: Some("".to_string()),
161 |                 summary: "".to_string(),
162 |             },
163 |             exams: vec![Exam::Oral(Some(30))],
164 |             workloads: Vec::new(),
165 |             logistics: LogisticInformation::new(Vec::new(), Faculty::Science, Vec::new()),
166 |         };
167 |         assert_eq!(expected_course, course.unwrap());
168 |     }
169 | }
170 | 


--------------------------------------------------------------------------------
/backend/rust_parser/src/parser.rs:
--------------------------------------------------------------------------------
  1 | use anyhow::{bail, ensure, Context, Result};
  2 | use serde::Serialize;
  3 | use tl::VDom;
  4 | 
  5 | use crate::parser::course_information::parse_course_info;
  6 | pub mod course_information;
  7 | 
  8 | use crate::parser::exam_information::parse_course_exams;
  9 | pub mod exam_information;
 10 | 
 11 | use crate::parser::logistic_information::parse_logistic_info;
 12 | pub mod logistic_information;
 13 | 
 14 | use crate::parser::workload_information::parse_workloads;
 15 | pub mod workload_information;
 16 | 
 17 | use crate::parser::content_serialiser::grab_htmls;
 18 | pub mod content_serialiser;
 19 | 
 20 | #[derive(Debug, PartialEq, Serialize)]
 21 | pub struct Course {
 22 |     pub title: String,
 23 |     pub info: CourseInformation,
 24 |     pub logistics: LogisticInformation,
 25 |     pub workloads: Vec<Workload>,
 26 |     pub exams: Vec<Exam>,
 27 |     pub description: Description,
 28 | }
 29 | 
 30 | #[derive(Debug)]
 31 | pub enum CourseLanguage {
 32 |     English,
 33 |     Danish,
 34 | }
 35 | 
 36 | #[derive(Debug, PartialEq, Serialize)]
 37 | pub struct CourseInformation {
 38 |     pub id: String,
 39 |     ects: f32,
 40 |     block: Vec<Block>,
 41 |     schedule: Vec<Schedule>,
 42 |     language: Vec<Language>,
 43 |     duration: Duration,
 44 |     degree: Vec<Degree>,
 45 |     capacity: Capacity,
 46 | }
 47 | 
 48 | #[derive(Debug, PartialEq, Serialize)]
 49 | pub enum Department {
 50 |     // Faculty of Science
 51 |     PlantAndEnvironmentalScience,
 52 |     Biology,
 53 |     ComputerScience,
 54 |     FoodAndResourceEconomics,
 55 |     FoodScience,
 56 |     GeosciencesAndNaturalResourceManagement,
 57 |     NutritionExerciseAndSports,
 58 |     Mathematics,
 59 |     ScienceEducation,
 60 |     // PlantAndEnvironmentalSciences, this never occurs as of end of 2023
 61 |     Chemistry,
 62 |     NielsBohrInstitute,
 63 |     NaturalHistoryMuseumOfDenmark,
 64 |     VeterinaryAndAnimalSciences,
 65 |     BiomedicalSciences,
 66 |     PublicHealth,
 67 |     DrugDesignAndPharmacology,
 68 |     CellularAndMolecularMedicine,
 69 |     Pharmacy,
 70 |     Globe,
 71 | }
 72 | impl Department {
 73 |     fn from_str(s: &str) -> Result<Self> {
 74 |         match s.replace('\n', " ").as_str() {
 75 |             "Department of Computer Science" | "Datalogisk Institut" => {
 76 |                 Ok(Department::ComputerScience)
 77 |             }
 78 |             "Institut for Idræt og Ernæring" | "Department of Nutrition, Exercise and Sports" => {
 79 |                 Ok(Department::NutritionExerciseAndSports)
 80 |             }
 81 |             "Statens Naturhistoriske Museum" | "The Natural History Museum of Denmark" => {
 82 |                 Ok(Department::NaturalHistoryMuseumOfDenmark)
 83 |             }
 84 |             "Institut for Plante- og Miljøvidenskab"
 85 |             | "Department of Plant and Environmental Sciences" => {
 86 |                 Ok(Department::PlantAndEnvironmentalScience)
 87 |             }
 88 |             "Institut for Matematiske Fag" | "Department of Mathematical Sciences" => {
 89 |                 Ok(Department::Mathematics)
 90 |             }
 91 |             "Niels Bohr Institutet" | "The Niels Bohr Institute" => {
 92 |                 Ok(Department::NielsBohrInstitute)
 93 |             }
 94 |             "Institut for Geovidenskab og Naturforvaltning"
 95 |             | "Department of Geoscience and Natural Resource Management" => {
 96 |                 Ok(Department::GeosciencesAndNaturalResourceManagement)
 97 |             }
 98 |             "Institut for Naturfagenes Didaktik" | "Department of Science Education" => {
 99 |                 Ok(Department::ScienceEducation)
100 |             }
101 |             "Institut for Fødevare- og Ressourceøkonomi"
102 |             | "Department of Food and Resource Economics" => {
103 |                 Ok(Department::FoodAndResourceEconomics)
104 |             }
105 |             "Institut for Fødevarevidenskab" | "Department of Food Science" => {
106 |                 Ok(Department::FoodScience)
107 |             }
108 |             "Kemisk Institut" | "Department of Chemistry" => Ok(Department::Chemistry),
109 |             "Biologisk Institut" | "Department of Biology" => Ok(Department::Biology),
110 |             "Department of Veterinary and Animal Sciences"
111 |             | "Institut for Veterinær- og Husdyrvidenskab (IVH)" => {
112 |                 Ok(Department::VeterinaryAndAnimalSciences)
113 |             }
114 |             "Department of Biomedical Sciences" => Ok(Department::BiomedicalSciences),
115 |             "Department of Pharmacy" => Ok(Department::Pharmacy),
116 |             "Institut for Lægemiddeldesign og Farmakologi"
117 |             | "Department of Drug Design and Pharmacology" => {
118 |                 Ok(Department::DrugDesignAndPharmacology)
119 |             }
120 |             "Department of Cellular and Molecular Medicine" => {
121 |                 Ok(Department::CellularAndMolecularMedicine)
122 |             }
123 |             "Department of Public Health" => Ok(Department::PublicHealth),
124 |             "Institut for Nordiske Studier og Sprogvidenskab" => {
125 |                 bail!("Nordic studies not supported <EXPECTED>")
126 |             }
127 |             "GLOBE Institute" => Ok(Department::Globe),
128 |             _ => bail!("Unknown department: {}", s),
129 |         }
130 |     }
131 | }
132 | 
133 | #[derive(Debug, PartialEq, Serialize)]
134 | pub enum Faculty {
135 |     Science,
136 | }
137 | 
138 | #[derive(Debug, PartialEq, Serialize)]
139 | pub struct Coordinator {
140 |     name: String,
141 |     email: String,
142 | }
143 | impl Coordinator {
144 |     pub fn new(name: String, email: String) -> Self {
145 |         Self { name, email }
146 |     }
147 | }
148 | 
149 | #[derive(Debug, PartialEq, Serialize)]
150 | pub struct LogisticInformation {
151 |     departments: Vec<Department>,
152 |     faculty: Faculty,
153 |     coordinators: Vec<Coordinator>,
154 | }
155 | impl LogisticInformation {
156 |     pub fn new(
157 |         departments: Vec<Department>,
158 |         faculty: Faculty,
159 |         coordinators: Vec<Coordinator>,
160 |     ) -> Self {
161 |         Self {
162 |             departments,
163 |             faculty,
164 |             coordinators,
165 |         }
166 |     }
167 | }
168 | 
169 | impl CourseInformation {
170 |     #[allow(clippy::too_many_arguments)]
171 |     pub fn new(
172 |         id: String,
173 |         ects: f32,
174 |         block: Vec<Block>,
175 |         schedule: Vec<Schedule>,
176 |         language: Vec<Language>,
177 |         duration: Duration,
178 |         degree: Vec<Degree>,
179 |         capacity: Capacity,
180 |     ) -> Self {
181 |         Self {
182 |             id,
183 |             ects,
184 |             block,
185 |             schedule,
186 |             language,
187 |             duration,
188 |             degree,
189 |             capacity,
190 |         }
191 |     }
192 | }
193 | 
194 | #[derive(Debug, PartialEq, Serialize)]
195 | pub enum Block {
196 |     One,
197 |     Two,
198 |     Three,
199 |     Four,
200 |     Summer,
201 |     Other(String),
202 | }
203 | 
204 | #[derive(Debug, PartialEq, Serialize)]
205 | pub enum Schedule {
206 |     A,
207 |     B,
208 |     C,
209 |     D,
210 |     OutsideOfSchedule,
211 |     Other(String),
212 | }
213 | 
214 | #[derive(Debug, PartialEq, Serialize)]
215 | pub enum Language {
216 |     Danish,
217 |     English,
218 | }
219 | 
220 | #[derive(Debug, Eq, PartialEq, Serialize)]
221 | pub enum Duration {
222 |     One,
223 |     Two,
224 |     DependsOnEcts,
225 |     Custom(String),
226 | }
227 | 
228 | #[derive(Debug, PartialEq, Eq, Ord, PartialOrd, Serialize)]
229 | pub enum Degree {
230 |     Phd,
231 |     Bachelor,
232 |     Master,
233 |     Propædeutik,
234 | }
235 | 
236 | #[derive(Debug, PartialEq, Serialize)]
237 | pub struct Capacity(pub Option<u32>);
238 | 
239 | #[derive(Debug, PartialEq, Serialize)]
240 | pub enum Exam {
241 |     Oral(Option<u32>),
242 |     Written(Option<u32>),
243 |     Assignment(Option<u32>),
244 |     ContinuousAssessment,
245 |     Other,
246 | }
247 | 
248 | #[derive(Debug, PartialEq, Serialize)]
249 | pub enum WorkloadType {
250 |     Exam,
251 |     ELearning,
252 |     Laboratory,
253 |     StudyGroup,
254 |     TheoryExercises,
255 |     FieldWork,
256 |     Preparation,
257 |     ExamPreparation,
258 |     Excursions,
259 |     Lectures,
260 |     PracticalExercises,
261 |     ProjectWork,
262 |     Exercises,
263 |     Guidance,
264 |     ClassInstruction,
265 |     PracticalTraining,
266 |     Seminar,
267 | }
268 | impl WorkloadType {
269 |     fn from_str(s: &str) -> Result<Self> {
270 |         match s {
271 |             "Forelæsninger" | "Lectures" => Ok(WorkloadType::Lectures),
272 |             "Forberedelse (anslået)" | "Preparation" => Ok(WorkloadType::Preparation),
273 |             "Theory exercises" | "Teoretiske øvelser" => Ok(WorkloadType::TheoryExercises),
274 |             "Project work" | "Projektarbejde" => Ok(WorkloadType::ProjectWork),
275 |             "Øvelser" | "Exercises" => Ok(WorkloadType::Exercises),
276 |             "Eksamen" | "Exam" => Ok(WorkloadType::Exam),
277 |             "Eksamensforberedelse" | "Exam Preparation" => Ok(WorkloadType::ExamPreparation),
278 |             "Holdundervisning" | "Class Instruction" => Ok(WorkloadType::ClassInstruction),
279 |             "Practical exercises" | "Praktiske øvelser" => Ok(WorkloadType::PracticalExercises),
280 |             "E-Learning" | "E-læring" => Ok(WorkloadType::ELearning),
281 |             "Feltarbejde" | "Field Work" => Ok(WorkloadType::FieldWork),
282 |             "Studiegrupper" | "Study Groups" => Ok(WorkloadType::StudyGroup),
283 |             "Seminar" => Ok(WorkloadType::Seminar),
284 |             "Ekskursioner" | "Excursions" => Ok(WorkloadType::Excursions),
285 |             "Laboratorie" | "Laboratory" => Ok(WorkloadType::Laboratory),
286 |             "Praktik" | "Practical Training" => Ok(WorkloadType::PracticalTraining),
287 | 
288 |             "Guidance" | "Vejledning" => Ok(WorkloadType::Guidance),
289 |             _ => bail!("Unknown workload type: {}", s),
290 |         }
291 |     }
292 | }
293 | 
294 | #[derive(Debug, PartialEq, Serialize)]
295 | pub struct Workload {
296 |     workload_type: WorkloadType,
297 |     hours: f32,
298 | }
299 | impl Workload {
300 |     pub fn new(workload_type: WorkloadType, hours: f32) -> Self {
301 |         Self {
302 |             workload_type,
303 |             hours,
304 |         }
305 |     }
306 | }
307 | 
308 | #[derive(Debug, PartialEq, Serialize)]
309 | pub struct Description {
310 |     pub content: String,
311 |     pub learning_outcome: String,
312 |     pub recommended_qualifications: Option<String>,
313 |     pub summary: String,
314 | }
315 | 
316 | pub fn parse_course(html: &str) -> Result<Course> {
317 |     let dom = tl::parse(html, tl::ParserOptions::default())?;
318 |     let content = dom.get_element_by_id("content");
319 |     let title = parse_title(&dom)?;
320 | 
321 |     ensure!(
322 |         content.is_some(),
323 |         "Unable to find content element, this should not happen"
324 |     );
325 |     let info = parse_course_info(&dom).context(format!("Unable to parse course: {title}"))?;
326 |     let logistic_info = parse_logistic_info(&dom).context(format!(
327 |         "Unable to parse logistic information for course: {title}"
328 |     ))?;
329 | 
330 |     let exam_info = parse_course_exams(&dom).context(format!(
331 |         "Unable to parse exam information for course: {title}"
332 |     ))?;
333 | 
334 |     let workload_info = parse_workloads(&dom).context(format!(
335 |         "Unable to parse workload information for course: {title}"
336 |     ))?;
337 | 
338 |     let html_info = grab_htmls(&dom).context(format!(
339 |         "Unable to grab html information for course: {title}"
340 |     ))?;
341 | 
342 |     Ok(Course {
343 |         title,
344 |         info,
345 |         logistics: logistic_info,
346 |         exams: exam_info,
347 |         workloads: workload_info,
348 |         description: html_info,
349 |     })
350 | }
351 | fn parse_title(dom: &VDom) -> Result<String> {
352 |     let title = dom
353 |         .get_elements_by_class_name("courseTitle")
354 |         .next()
355 |         .context("Unable to find course title")
356 |         .and_then(|elem| {
357 |             elem.get(dom.parser())
358 |                 .context("Unable to grab parser for the dom, this should not happen")
359 |                 .map(|tag| tag.inner_text(dom.parser()))
360 |         });
361 | 
362 |     let binding = title
363 |         .unwrap_or_else(|_| "Error unwrapping html title".into())
364 |         .replace(['\u{a0}', '\n'], " ");
365 | 
366 |     // Then split them
367 |     let res: Vec<&str> = binding.split_whitespace().collect();
368 | 
369 |     // Return only the part of the title without the course code
370 |     ensure!(
371 |         res.len() >= 2,
372 |         "Title does not conform to expected structure: <COURSECODE> <NAME>"
373 |     );
374 | 
375 |     Ok(res[1..].join(" "))
376 | }
377 | 


--------------------------------------------------------------------------------