├── .envrc ├── backend ├── vector_store │ ├── .envrc │ ├── .gitignore │ ├── build.rs │ ├── .sqlx │ │ ├── query-63012c5ed8ef1bd8c3c931294767c96a5eae22e960264ab1a6b4b9e957c3c20c.json │ │ ├── query-e1166c8c3bde787b1b07787d9a30ec58575597e8223d57d8d02e43944b7d5cae.json │ │ ├── query-65f1c255cf0c5df5476c730aea2a9448783ca002a83452a3f245a82b79371652.json │ │ ├── query-7060a1b2757a7c942deadc8b86ccc6ea26bbfb5e24db6dfbdea16ebb88c79319.json │ │ ├── query-1bac47c3b9f3a187ee440c83f2c22106f0bfdb8d7865eec2d49a0585da7b21ca.json │ │ ├── query-07ff355f2f2a14bf0a163f3cb6450f57baa285962f0e9fefe0c8d6a5084e7d0a.json │ │ └── query-b67fb1612df114fa58878cc079a5c912765be487fb0d98985d68e6f2dcf4f885.json │ ├── Dockerfile │ ├── Cargo.toml │ ├── README.md │ ├── migrations │ │ └── 0001_initial.sql │ └── src │ │ ├── populate.rs │ │ ├── embedding.rs │ │ ├── main.rs │ │ └── db.rs ├── db-manager │ ├── doc │ │ └── intro.md │ ├── src │ │ ├── statistics │ │ │ ├── .gitignore │ │ │ ├── utils.clj │ │ │ └── core.clj │ │ ├── db_manager │ │ │ ├── cache.clj │ │ │ ├── routes.clj │ │ │ ├── core.clj │ │ │ └── db.clj │ │ ├── exam_scraper │ │ │ └── core.clj │ │ └── course_scraper │ │ │ ├── upsert.clj │ │ │ └── watcher.clj │ ├── .gitignore │ ├── Dockerfile │ ├── test │ │ └── db_manager │ │ │ └── core_test.clj │ ├── README.md │ ├── CHANGELOG.md │ └── project.clj └── rust_parser │ ├── Dockerfile │ ├── Cargo.toml │ ├── src │ ├── parser │ │ ├── workload_information.rs │ │ ├── content_serialiser.rs │ │ ├── exam_information.rs │ │ └── logistic_information.rs │ ├── main.rs │ └── parser.rs │ └── Cargo.lock ├── frontend ├── .npmrc ├── static │ ├── robots.txt │ ├── favicon.ico │ └── assets │ │ ├── og-image.png │ │ └── preview.png ├── postcss.config.js ├── vite.config.ts ├── .gitignore ├── Dockerfile ├── src │ ├── components │ │ ├── Changelog │ │ │ ├── ChangelogButton.svelte │ │ │ ├── store.ts │ │ │ └── ChangelogModal.svelte │ │ ├── GradeGraph │ │ │ ├── sample.json │ │ │ └── GradeGraph.svelte │ │ ├── SideCard.svelte │ │ ├── CheckboxMenu.svelte │ │ ├── BigCheckbox.svelte │ │ ├── Footer │ │ │ └── Footer.svelte │ │ ├── Loader │ │ │ └── Loader.svelte │ │ └── OverviewCard │ │ │ └── OverviewCard.svelte │ ├── assets │ │ ├── Dk.svelte │ │ ├── CloseCross.svelte │ │ ├── Gb.svelte │ │ ├── MenuIcon.svelte │ │ ├── Github.svelte │ │ └── Facebook.svelte │ ├── app.d.ts │ ├── app.html │ ├── app.css │ ├── routes │ │ ├── +layout.svelte │ │ ├── sitemap.xml │ │ │ └── +server.ts │ │ └── course │ │ │ └── [courseId] │ │ │ ├── +page.server.ts │ │ │ └── +page.svelte │ ├── theme.ts │ ├── stores.ts │ └── course.ts ├── svelte.config.js ├── .prettierrc.json ├── eslint.config.js ├── tsconfig.json ├── README.md ├── package.json └── tailwind.config.js ├── .gitignore ├── assets └── showcase.gif ├── .gitattributes ├── .github └── workflows │ ├── docker-ci.yml │ ├── deploy.yml │ ├── rust-ci.yml │ └── frontend-ci.yml ├── .pre-commit-config.yaml ├── LICENSE ├── flake.nix ├── docker-compose.yml ├── CONTRIBUTING.md ├── README.md └── flake.lock /.envrc: -------------------------------------------------------------------------------- 1 | use flake 2 | -------------------------------------------------------------------------------- /backend/vector_store/.envrc: -------------------------------------------------------------------------------- 1 | use flake 2 | -------------------------------------------------------------------------------- /backend/vector_store/.gitignore: -------------------------------------------------------------------------------- 1 | .fastembed_cache 2 | -------------------------------------------------------------------------------- /frontend/.npmrc: -------------------------------------------------------------------------------- 1 | engine-strict=true 2 | resolution-mode=highest 3 | -------------------------------------------------------------------------------- /frontend/static/robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Allow: / 3 | Disallow: /api 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | .direnv 3 | .lsp 4 | .clj-kondo 5 | target 6 | log.txt 7 | -------------------------------------------------------------------------------- /assets/showcase.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshniemela/KU-courses/HEAD/assets/showcase.gif -------------------------------------------------------------------------------- /frontend/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshniemela/KU-courses/HEAD/frontend/static/favicon.ico -------------------------------------------------------------------------------- /frontend/static/assets/og-image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshniemela/KU-courses/HEAD/frontend/static/assets/og-image.png -------------------------------------------------------------------------------- /frontend/static/assets/preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshniemela/KU-courses/HEAD/frontend/static/assets/preview.png -------------------------------------------------------------------------------- /frontend/postcss.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | }; 7 | -------------------------------------------------------------------------------- /backend/db-manager/doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to db-manager 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) 4 | -------------------------------------------------------------------------------- /frontend/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { sveltekit } from "@sveltejs/kit/vite"; 2 | import { defineConfig } from "vite"; 3 | 4 | export default defineConfig({ 5 | plugins: [sveltekit()], 6 | }); 7 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | frontend/src/assets/* linguist-vendored 2 | frontend/src/theme.js linguist-vendored 3 | backend/rust_parser/test_data/pages/* linguist-vendored 4 | backend/employed/* linguist-vendored 5 | -------------------------------------------------------------------------------- /frontend/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | /build 4 | /.svelte-kit 5 | /package 6 | .env 7 | .env.* 8 | !.env.example 9 | vite.config.js.timestamp-* 10 | vite.config.ts.timestamp-* 11 | -------------------------------------------------------------------------------- /backend/vector_store/build.rs: -------------------------------------------------------------------------------- 1 | // generated by `sqlx migrate build-script` 2 | fn main() { 3 | // trigger recompilation when a new migration is added 4 | println!("cargo:rerun-if-changed=migrations"); 5 | } 6 | -------------------------------------------------------------------------------- /backend/db-manager/src/statistics/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | profiles.clj 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | /.prepl-port 12 | .hgignore 13 | .hg/ 14 | -------------------------------------------------------------------------------- /backend/db-manager/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | profiles.clj 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | /.prepl-port 12 | .hgignore 13 | .hg/ 14 | .calva 15 | .lsp 16 | .clj-kondo 17 | -------------------------------------------------------------------------------- /frontend/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:22.6.0-alpine3.19 AS build 2 | 3 | WORKDIR /app 4 | 5 | COPY package*.json ./ 6 | RUN npm install 7 | COPY . ./ 8 | RUN npm run build 9 | RUN npm prune --omit=dev 10 | 11 | EXPOSE 5000 12 | ENV HOST=0.0.0.0 13 | ENV PORT=5000 14 | CMD ["node", "./build/index.js"] 15 | 16 | -------------------------------------------------------------------------------- /frontend/src/components/Changelog/ChangelogButton.svelte: -------------------------------------------------------------------------------- 1 | 4 | 5 | 12 | -------------------------------------------------------------------------------- /frontend/src/assets/Dk.svelte: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /frontend/src/app.d.ts: -------------------------------------------------------------------------------- 1 | // See https://kit.svelte.dev/docs/types#app 2 | // for information about these interfaces 3 | declare global { 4 | namespace App { 5 | // interface Error {} 6 | // interface Locals {} 7 | // interface PageData {} 8 | // interface Platform {} 9 | } 10 | } 11 | 12 | export {}; 13 | -------------------------------------------------------------------------------- /backend/rust_parser/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rust:slim 2 | 3 | ENV TARGET x86_64-unknown-linux-musl 4 | 5 | RUN rustup target add $TARGET 6 | 7 | COPY . /app 8 | 9 | WORKDIR /app 10 | 11 | RUN cargo build --release --locked --target "$TARGET" 12 | 13 | 14 | 15 | # run the target 16 | RUN chmod +x /app/target/x86_64-unknown-linux-musl/release/rust_parser 17 | -------------------------------------------------------------------------------- /frontend/svelte.config.js: -------------------------------------------------------------------------------- 1 | import adapter from "@sveltejs/adapter-node"; 2 | import { vitePreprocess } from "@sveltejs/vite-plugin-svelte"; 3 | 4 | /** @type {import('@sveltejs/kit').Config} */ 5 | const config = { 6 | preprocess: vitePreprocess(), 7 | 8 | kit: { 9 | adapter: adapter(), 10 | }, 11 | }; 12 | export default config; 13 | -------------------------------------------------------------------------------- /frontend/.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "trailingComma": "es5", 3 | "tabWidth": 4, 4 | "arrowParens": "always", 5 | "bracketSpacing": true, 6 | "endOfLine": "lf", 7 | "printWidth": 80, 8 | "singleQuote": false, 9 | "svelteStrictMode": true, 10 | "svelteAllowShorthand": false, 11 | "plugins": ["prettier-plugin-svelte"] 12 | } 13 | -------------------------------------------------------------------------------- /frontend/src/app.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | %sveltekit.head% 7 | 8 | 9 |
%sveltekit.body%
10 | 11 | 12 | -------------------------------------------------------------------------------- /frontend/src/components/GradeGraph/sample.json: -------------------------------------------------------------------------------- 1 | [ 2 | { "grade": "12", "count": 9 }, 3 | { "grade": "10", "count": 15 }, 4 | { "grade": "7", "count": 7 }, 5 | { "grade": "4", "count": 4 }, 6 | { "grade": "02", "count": 3 }, 7 | { "grade": "00", "count": 2 }, 8 | { "grade": "-3", "count": 0 }, 9 | { "grade": "Ej m\u00f8dt", "count": 9 } 10 | ] 11 | -------------------------------------------------------------------------------- /.github/workflows/docker-ci.yml: -------------------------------------------------------------------------------- 1 | name: Docker CI 2 | 3 | on: 4 | push: 5 | paths: 6 | - ".github/workflows/docker-ci.yml" 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | docker: 13 | name: Docker compose up 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Docker compose up 18 | run: docker compose up -d 19 | -------------------------------------------------------------------------------- /backend/rust_parser/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rust_parser" 3 | version = "1.0.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | anyhow = "1.0.75" 10 | pretty_assertions = "1.4.0" 11 | regex = "1.9.6" 12 | serde = { version = "1.0.188", features = ["derive"] } 13 | serde_json = "1.0.107" 14 | tl = "0.7.7" 15 | -------------------------------------------------------------------------------- /backend/vector_store/.sqlx/query-63012c5ed8ef1bd8c3c931294767c96a5eae22e960264ab1a6b4b9e957c3c20c.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "DELETE FROM course_coordinator WHERE course_id = $1", 4 | "describe": { 5 | "columns": [], 6 | "parameters": { 7 | "Left": [ 8 | "Text" 9 | ] 10 | }, 11 | "nullable": [] 12 | }, 13 | "hash": "63012c5ed8ef1bd8c3c931294767c96a5eae22e960264ab1a6b4b9e957c3c20c" 14 | } 15 | -------------------------------------------------------------------------------- /frontend/eslint.config.js: -------------------------------------------------------------------------------- 1 | import globals from "globals"; 2 | import pluginJs from "@eslint/js"; 3 | import tseslint from "typescript-eslint"; 4 | 5 | export default [ 6 | { files: ["**/*.{js,mjs,cjs,ts}"] }, 7 | { 8 | ignores: [".svelte-kit", "node_modules", "dist"], 9 | }, 10 | { languageOptions: { globals: { ...globals.browser, ...globals.node } } }, 11 | pluginJs.configs.recommended, 12 | ...tseslint.configs.recommended, 13 | ]; 14 | -------------------------------------------------------------------------------- /backend/db-manager/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rust_parser AS PARSER 2 | 3 | FROM clojure:temurin-20-lein-alpine AS BUILD 4 | 5 | 6 | COPY . /code 7 | 8 | WORKDIR /code 9 | 10 | RUN lein uberjar 11 | 12 | FROM eclipse-temurin:20-alpine 13 | 14 | WORKDIR /app 15 | 16 | COPY --from=PARSER /app/target/x86_64-unknown-linux-musl/release/rust_parser /usr/local/bin/rust_parser 17 | 18 | COPY --from=BUILD /code/target/uberjar/*-standalone.jar ./app.jar 19 | 20 | CMD ["java", "-jar", "app.jar", "-f"] 21 | -------------------------------------------------------------------------------- /backend/vector_store/.sqlx/query-e1166c8c3bde787b1b07787d9a30ec58575597e8223d57d8d02e43944b7d5cae.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "INSERT INTO course_coordinator (course_id, email) VALUES ($1, $2)", 4 | "describe": { 5 | "columns": [], 6 | "parameters": { 7 | "Left": [ 8 | "Text", 9 | "Text" 10 | ] 11 | }, 12 | "nullable": [] 13 | }, 14 | "hash": "e1166c8c3bde787b1b07787d9a30ec58575597e8223d57d8d02e43944b7d5cae" 15 | } 16 | -------------------------------------------------------------------------------- /frontend/src/components/SideCard.svelte: -------------------------------------------------------------------------------- 1 | 9 | 10 | 11 |
12 |

{heading}

13 |
14 | {@render children?.()} 15 |
16 |
17 | -------------------------------------------------------------------------------- /backend/vector_store/.sqlx/query-65f1c255cf0c5df5476c730aea2a9448783ca002a83452a3f245a82b79371652.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "INSERT INTO coordinator (email, full_name) VALUES ($1, $2)\n ON CONFLICT(email) DO NOTHING", 4 | "describe": { 5 | "columns": [], 6 | "parameters": { 7 | "Left": [ 8 | "Text", 9 | "Text" 10 | ] 11 | }, 12 | "nullable": [] 13 | }, 14 | "hash": "65f1c255cf0c5df5476c730aea2a9448783ca002a83452a3f245a82b79371652" 15 | } 16 | -------------------------------------------------------------------------------- /backend/db-manager/src/db_manager/cache.clj: -------------------------------------------------------------------------------- 1 | (ns db-manager.cache) 2 | 3 | (defn cache 4 | "This simply caches the result of a function call. It is used for memoizing the 5 | results of a data-source (for instance, a database query)." 6 | [hashable data-source] 7 | (let [cache-atom (atom {}) 8 | hashed (hash hashable) 9 | result (get @cache-atom hashed)] 10 | (if result 11 | result 12 | (let [result (data-source hashable)] 13 | (swap! cache-atom assoc hashed result) 14 | result)))) 15 | -------------------------------------------------------------------------------- /backend/vector_store/Dockerfile: -------------------------------------------------------------------------------- 1 | # BUILDER 2 | FROM rust:bookworm as builder 3 | 4 | WORKDIR /app 5 | COPY . . 6 | 7 | RUN apt-get update 8 | RUN apt-get install -y libssl-dev pkg-config openssl g++ 9 | 10 | RUN cargo build --release 11 | 12 | # FINAL IMAGE 13 | FROM debian:bookworm-slim 14 | WORKDIR /app 15 | COPY --from=builder /app/target/release/vector_store ./ 16 | 17 | RUN apt-get update 18 | RUN apt-get install -y libssl3 ca-certificates 19 | 20 | ENV SERVER_ADDRESS=0.0.0.0 21 | ENV SERVER_PORT=4000 22 | ENV DATA_DIR="../data/" 23 | CMD ["./vector_store"] 24 | -------------------------------------------------------------------------------- /backend/vector_store/.sqlx/query-7060a1b2757a7c942deadc8b86ccc6ea26bbfb5e24db6dfbdea16ebb88c79319.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "INSERT INTO course (id, title, content) VALUES ($1, $2, $3)\n ON CONFLICT(id) DO UPDATE SET title = $2, content = $3, last_modified = CURRENT_TIMESTAMP", 4 | "describe": { 5 | "columns": [], 6 | "parameters": { 7 | "Left": [ 8 | "Text", 9 | "Text", 10 | "Text" 11 | ] 12 | }, 13 | "nullable": [] 14 | }, 15 | "hash": "7060a1b2757a7c942deadc8b86ccc6ea26bbfb5e24db6dfbdea16ebb88c79319" 16 | } 17 | -------------------------------------------------------------------------------- /frontend/src/assets/CloseCross.svelte: -------------------------------------------------------------------------------- 1 | 10 | 11 | 18 | 22 | 23 | -------------------------------------------------------------------------------- /backend/db-manager/test/db_manager/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns db-manager.core-test 2 | (:require [clojure.test :refer :all] 3 | [db-manager.core :refer :all])) 4 | 5 | (deftest merge-test 6 | (let [test-employees [{:email "foo@bar.dk" :name "Erik" :title "CEO"} 7 | {:email "foo@bar.dk" :name "Erik" :title "CTO"} 8 | {:email "josh@jniemela.dk" :name "Josh" :title "Developer"}]] 9 | (is (= (merge-employees test-employees) 10 | [{:email "foo@bar.dk" :name "Erik" :title "CEO, CTO"} 11 | {:email "josh@jniemela.dk" :name "Josh" :title "Developer"}])))) 12 | 13 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy to server 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | deploy: 10 | name: Deploy to server 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Run SSH commands 15 | uses: appleboy/ssh-action@v1.0.3 16 | with: 17 | host: ${{ secrets.HOST }} 18 | username: ${{ secrets.USERNAME }} 19 | key: ${{ secrets.SSH_KEY }} 20 | port: 22 21 | script: | 22 | cd /dockers/KU-courses 23 | git fetch 24 | git reset --hard origin/main 25 | docker compose up -d --build 26 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v2.3.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - repo: https://github.com/psf/black 9 | rev: 22.10.0 10 | hooks: 11 | - id: black 12 | - repo: https://github.com/pre-commit/mirrors-prettier 13 | rev: v3.0.0-alpha.9-for-vscode 14 | hooks: 15 | - id: prettier 16 | args: [--config, frontend/prettierrc.json, --write] # edit files in-place 17 | additional_dependencies: 18 | - prettier 19 | - prettier-plugin-svelte 20 | - svelte 21 | -------------------------------------------------------------------------------- /frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./.svelte-kit/tsconfig.json", 3 | "compilerOptions": { 4 | "allowJs": true, 5 | "checkJs": true, 6 | "esModuleInterop": true, 7 | "forceConsistentCasingInFileNames": true, 8 | "resolveJsonModule": true, 9 | "skipLibCheck": true, 10 | "sourceMap": true, 11 | "strict": true 12 | } 13 | // Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias 14 | // 15 | // If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes 16 | // from the referenced tsconfig.json - TypeScript does not merge them in 17 | } 18 | -------------------------------------------------------------------------------- /frontend/src/assets/Gb.svelte: -------------------------------------------------------------------------------- 1 | 6 | 7 | 11 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /backend/vector_store/.sqlx/query-1bac47c3b9f3a187ee440c83f2c22106f0bfdb8d7865eec2d49a0585da7b21ca.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "SELECT title, content FROM course WHERE id = $1", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "title", 9 | "type_info": "Text" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "content", 14 | "type_info": "Text" 15 | } 16 | ], 17 | "parameters": { 18 | "Left": [ 19 | "Text" 20 | ] 21 | }, 22 | "nullable": [ 23 | false, 24 | false 25 | ] 26 | }, 27 | "hash": "1bac47c3b9f3a187ee440c83f2c22106f0bfdb8d7865eec2d49a0585da7b21ca" 28 | } 29 | -------------------------------------------------------------------------------- /.github/workflows/rust-ci.yml: -------------------------------------------------------------------------------- 1 | name: Rust CI 2 | 3 | on: 4 | push: 5 | paths: 6 | - "backend/rust_parser/**" 7 | - "backend/vector_store/**" 8 | - ".github/workflows/rust-ci.yml" 9 | pull_request: 10 | branches: 11 | - main 12 | 13 | env: 14 | CARGO_TERM_COLOR: always 15 | RUSTFLAGS: "-Dwarnings" 16 | 17 | jobs: 18 | prettier: 19 | name: Rust quality check 20 | runs-on: ubuntu-latest 21 | strategy: 22 | matrix: 23 | service: [rust_parser, vector_store] 24 | steps: 25 | - uses: actions/checkout@v4 26 | - name: Run clippy on ${{ matrix.service }} 27 | working-directory: backend/${{ matrix.service }} 28 | run: cargo clippy --all-targets --all-features 29 | -------------------------------------------------------------------------------- /frontend/src/components/Changelog/store.ts: -------------------------------------------------------------------------------- 1 | import { writable } from "svelte/store"; 2 | import { browser } from "$app/environment"; 3 | 4 | function modalStoreFunctions() { 5 | const { subscribe, set } = writable(false); 6 | 7 | return { 8 | subscribe, 9 | open: () => { 10 | console.log(document.body.scrollTop); 11 | set(true); 12 | if (browser) { 13 | document.body.classList.add("modal-open"); 14 | } 15 | }, 16 | close: () => { 17 | set(false); 18 | if (browser) { 19 | document.body.classList.remove("modal-open"); 20 | } 21 | }, 22 | }; 23 | } 24 | 25 | export const modalStore = modalStoreFunctions(); 26 | -------------------------------------------------------------------------------- /backend/db-manager/README.md: -------------------------------------------------------------------------------- 1 | # db-manager 2 | 3 | This module is responsible for interacting with the backend and the PostgreSQL database, this includes creating the tables, destroying the tables, and querying for various things in the database. 4 | 5 | ## Installation 6 | 7 | Install leiningen and run `lein deps` to install dependencies 8 | 9 | ## Usage 10 | 11 | FIXME: explanation 12 | 13 | $ java -jar db-manager-0.1.0-standalone.jar [args] 14 | 15 | ## Options 16 | 17 | FIXME: listing of options this app accepts. 18 | 19 | ## Examples 20 | 21 | ... 22 | 23 | ### Bugs 24 | 25 | ... 26 | 27 | ### Any Other Sections 28 | ### That You Think 29 | ### Might be Useful 30 | 31 | ## License 32 | 33 | Owned by Josh Niemelä, MIT is in effect (MIT license document will be added later) 34 | -------------------------------------------------------------------------------- /backend/vector_store/.sqlx/query-07ff355f2f2a14bf0a163f3cb6450f57baa285962f0e9fefe0c8d6a5084e7d0a.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "SELECT c.id\n FROM course c\n LEFT JOIN title_embedding te ON c.id = te.course_id\n LEFT JOIN content_embedding ce ON c.id = ce.course_id\n WHERE\n c.last_modified > COALESCE(te.last_modified, to_timestamp(0)) OR\n c.last_modified > COALESCE(ce.last_modified, to_timestamp(0))", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Text" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [] 14 | }, 15 | "nullable": [ 16 | true 17 | ] 18 | }, 19 | "hash": "07ff355f2f2a14bf0a163f3cb6450f57baa285962f0e9fefe0c8d6a5084e7d0a" 20 | } 21 | -------------------------------------------------------------------------------- /frontend/src/assets/MenuIcon.svelte: -------------------------------------------------------------------------------- 1 | 8 | 12 | 16 | 20 | 21 | -------------------------------------------------------------------------------- /backend/vector_store/.sqlx/query-b67fb1612df114fa58878cc079a5c912765be487fb0d98985d68e6f2dcf4f885.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "SELECT id, title, content FROM course WHERE id = ANY($1)", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Text" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "title", 14 | "type_info": "Text" 15 | }, 16 | { 17 | "ordinal": 2, 18 | "name": "content", 19 | "type_info": "Text" 20 | } 21 | ], 22 | "parameters": { 23 | "Left": [ 24 | "TextArray" 25 | ] 26 | }, 27 | "nullable": [ 28 | false, 29 | false, 30 | false 31 | ] 32 | }, 33 | "hash": "b67fb1612df114fa58878cc079a5c912765be487fb0d98985d68e6f2dcf4f885" 34 | } 35 | -------------------------------------------------------------------------------- /backend/db-manager/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/). 3 | 4 | ## [Unreleased] 5 | ### Changed 6 | - Add a new arity to `make-widget-async` to provide a different widget shape. 7 | 8 | ## [0.1.1] - 2023-05-23 9 | ### Changed 10 | - Documentation on how to make the widgets. 11 | 12 | ### Removed 13 | - `make-widget-sync` - we're all async, all the time. 14 | 15 | ### Fixed 16 | - Fixed widget maker to keep working when daylight savings switches over. 17 | 18 | ## 0.1.0 - 2023-05-23 19 | ### Added 20 | - Files from the new template. 21 | - Widget maker public API - `make-widget-sync`. 22 | 23 | [Unreleased]: https://sourcehost.site/your-name/db-manager/compare/0.1.1...HEAD 24 | [0.1.1]: https://sourcehost.site/your-name/db-manager/compare/0.1.0...0.1.1 25 | -------------------------------------------------------------------------------- /frontend/src/app.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | /* FIXME: why does this need to be in app.css? */ 6 | body.modal-open { 7 | position: fixed; 8 | } 9 | 10 | html { 11 | overflow-y: scroll; 12 | } 13 | 14 | @layer utilities { 15 | input[type="checkbox"] { 16 | @apply appearance-none w-4 h-4 border-2 border-kuGray rounded-none bg-white checked:bg-kuRed; 17 | } 18 | } 19 | 20 | @layer components { 21 | input[type="checkbox"]::after { 22 | content: ""; 23 | position: absolute; 24 | display: none; 25 | top: 9px; 26 | left: 6px; 27 | width: 4px; 28 | height: 8px; 29 | border: solid white; 30 | border-width: 0 2px 2px 0; 31 | transform: rotate(45deg); 32 | } 33 | 34 | input[type="checkbox"]:checked::after { 35 | display: block; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /.github/workflows/frontend-ci.yml: -------------------------------------------------------------------------------- 1 | name: Frontend CI 2 | 3 | on: 4 | push: 5 | paths: 6 | - "frontend/**" 7 | - ".github/workflows/frontend-ci.yml" 8 | pull_request: 9 | branches: 10 | - main 11 | 12 | jobs: 13 | prettier: 14 | name: Code quality check 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | node-version: [18, 20, 22] 19 | steps: 20 | - uses: actions/checkout@v4 21 | - uses: actions/setup-node@v4 22 | with: 23 | node-version: ${{ matrix.node-version }} 24 | cache: "npm" 25 | cache-dependency-path: ./frontend/package-lock.json 26 | 27 | - name: Clean install 28 | working-directory: ./frontend 29 | run: npm ci 30 | 31 | - name: Prettier & Eslint 32 | working-directory: ./frontend 33 | run: npm run lint 34 | 35 | - name: Svelte check 36 | working-directory: ./frontend 37 | run: npm run check 38 | -------------------------------------------------------------------------------- /backend/vector_store/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "vector_store" 3 | version = "1.0.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | acap = "0.3.0" 10 | anyhow = "1.0.77" 11 | axum = "0.7.3" 12 | bincode = "1.3.3" 13 | criterion = "0.5.1" 14 | fastembed = "4.9.1" 15 | nanohtml2text = "0.1.4" 16 | ndarray = "0.15.6" 17 | ort = "2.0.0-alpha.4" 18 | rayon = "1.8.0" 19 | serde = "1.0.193" 20 | serde_json = "1.0.108" 21 | tokenizers = "0.15.0" 22 | tokio = {version = "1.32.0", features = ["full"]} 23 | sqlx = { version = "0.7", features = ["runtime-tokio", "postgres", "macros", "json", "time"] } 24 | openssl = { version = "0.10.64", features = ["vendored"] } 25 | pgvector = { version = "0.3", features = ["sqlx"] } 26 | lazy_static = "1.5.0" 27 | async-stream = "0.3.5" 28 | futures-util = "0.3.30" 29 | futures-core = "0.3.30" 30 | #lto = "fat" 31 | #codegen-units = 1 32 | #panic = "abort" 33 | #debug=true 34 | -------------------------------------------------------------------------------- /frontend/src/routes/+layout.svelte: -------------------------------------------------------------------------------- 1 | 21 | 22 | 23 | 29 | 30 | 31 |
32 |
33 | {@render children?.()} 34 |
35 |
36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2023 Joshua Niemelä 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /frontend/README.md: -------------------------------------------------------------------------------- 1 | # create-svelte 2 | 3 | Everything you need to build a Svelte project, powered by [`create-svelte`](https://github.com/sveltejs/kit/tree/master/packages/create-svelte). 4 | 5 | ## Creating a project 6 | 7 | If you're seeing this, you've probably already done this step. Congrats! 8 | 9 | ```bash 10 | # create a new project in the current directory 11 | npm create svelte@latest 12 | 13 | # create a new project in my-app 14 | npm create svelte@latest my-app 15 | ``` 16 | 17 | ## Developing 18 | 19 | Once you've created a project and installed dependencies with `npm install` (or `pnpm install` or `yarn`), start a development server: 20 | 21 | ```bash 22 | npm run dev 23 | 24 | # or start the server and open the app in a new browser tab 25 | npm run dev -- --open 26 | ``` 27 | 28 | ## Building 29 | 30 | To create a production version of your app: 31 | 32 | ```bash 33 | npm run build 34 | ``` 35 | 36 | You can preview the production build with `npm run preview`. 37 | 38 | > To deploy your app, you may need to install an [adapter](https://kit.svelte.dev/docs/adapters) for your target environment. 39 | -------------------------------------------------------------------------------- /backend/db-manager/project.clj: -------------------------------------------------------------------------------- 1 | (defproject db-manager "1.1.0" 2 | :description "" 3 | :url "https://github.com/joshniemela/disproject" 4 | :license {:name "" 5 | :url ""} 6 | :dependencies [[org.clojure/clojure "1.11.1"] 7 | [org.clojure/data.json "2.4.0"] 8 | [org.jsoup/jsoup "1.16.1"] 9 | [http-kit "2.3.0"] 10 | [ring "1.10.0"] 11 | [ring/ring-codec "1.2.0"] 12 | [metosin/reitit "0.6.0"] 13 | [metosin/muuntaja "0.6.8"] 14 | [metosin/reitit-swagger-ui "0.7.0-alpha4"] 15 | [org.clojure/tools.cli "1.0.214"] 16 | [ring-cors "0.1.13"] 17 | [io.staticweb/rate-limit "1.1.0"] 18 | [clj-http "3.12.3"] 19 | [datascript "1.5.3"] 20 | [technology.tabula/tabula "1.0.5"]] 21 | :main ^:skip-aot db-manager.core 22 | :resource-path "resources" 23 | :target-path "target/%s" 24 | :profiles {:uberjar {:aot :all 25 | :jvm-opts ["-Dclojure.compiler.direct-linking=true"]}}) 26 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | 2 | 3 | { 4 | description = "A basic Rust devshell for NixOS users developing Leptos"; 5 | 6 | inputs = { 7 | nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; 8 | flake-utils.url = "github:numtide/flake-utils"; 9 | rust-overlay.url = "github:oxalica/rust-overlay"; 10 | }; 11 | 12 | outputs = { 13 | self, 14 | nixpkgs, 15 | flake-utils, 16 | rust-overlay, 17 | ... 18 | }: 19 | flake-utils.lib.eachDefaultSystem ( 20 | system: let 21 | overlays = [(import rust-overlay) ]; 22 | pkgs = import nixpkgs { 23 | inherit system overlays; 24 | }; 25 | in 26 | with pkgs; { 27 | devShells.default = mkShell { 28 | buildInputs = [ 29 | docker 30 | docker-compose 31 | #git 32 | #nodejs 33 | #leiningen 34 | #pipenv 35 | #python311 36 | 37 | ## browsers for testing 38 | #firefox 39 | #chromium 40 | #pkg-config 41 | #rust-bin.stable.latest.default 42 | ]; 43 | 44 | DATABASE_URL="postgres://postgres:password123@localhost:5432/disku"; 45 | 46 | }; 47 | } 48 | ); 49 | } 50 | -------------------------------------------------------------------------------- /frontend/src/assets/Github.svelte: -------------------------------------------------------------------------------- 1 | 9 | 10 | 22 | -------------------------------------------------------------------------------- /backend/vector_store/README.md: -------------------------------------------------------------------------------- 1 | # Vector Search API 2 | 3 | This service is a simple API that allows you to search for the most relevant courses in 4 | the course catalogue using a query string. 5 | The sentence embedding model, `all-MiniLM-L12-v2` is used to embed the course descriptions, coordinator names, and course titles into vectors that can be very rapidly queried for similarity. 6 | 7 | # Features 8 | * Fast search for relevant courses 9 | * Automatic asynchronous embedding of course and coordinator data 10 | * Automatic insertion of new courses into the database 11 | 12 | # Requirements 13 | * Rust 14 | * docker 15 | * docker-compose (optional) 16 | * A connection to a PostgreSQL database containing the course catalogue and with the [pgvector](https://github.com/pgvector/pgvector) extension installed. 17 | 18 | # Deployment / Installation 19 | * The service can be deployed with the docker-compose file in the root of the repository. 20 | * It can be built using `docker build .` and manually be run if desired. 21 | 22 | ## Usage 23 | * The API accepts a POST request with a JSON body containing a single key, 'query', with the search query as a string. 24 | 25 | Example: 26 | ```json 27 | { 28 | "query": "Machine Learning" 29 | } 30 | ``` 31 | 32 | The API will return a JSON response with the key 'results' containing a list of the 10 most relevant courses in the course catalogue. 33 | 34 | 35 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | frontend: 3 | build: 4 | context: ./frontend 5 | 6 | restart: always 7 | ports: 8 | - "5000:5000" 9 | networks: 10 | - dis-network 11 | depends_on: 12 | - server 13 | 14 | server: 15 | build: ./backend/db-manager 16 | restart: always 17 | container_name: dbmanager 18 | ports: 19 | - "3000:3000" 20 | volumes: 21 | - ./data:/data 22 | 23 | networks: 24 | - dis-network 25 | depends_on: 26 | rust_builder: 27 | condition: service_completed_successfully 28 | 29 | 30 | vector_store: 31 | build: ./backend/vector_store 32 | container_name: vectorstore 33 | restart: on-failure 34 | ports: 35 | - "4000:4000" 36 | 37 | volumes: 38 | - ./data:/data 39 | 40 | environment: 41 | POSTGRES_URL: postgres://postgres:password123@postgres/disku 42 | 43 | networks: 44 | - dis-network 45 | 46 | rust_builder: 47 | build: ./backend/rust_parser 48 | image: rust_parser 49 | 50 | postgres: 51 | image: pgvector/pgvector:pg16 52 | ports: 53 | - "5432:5432" 54 | volumes: 55 | - ./data/postgres:/var/lib/postgresql/data 56 | environment: 57 | POSTGRES_USER: postgres 58 | POSTGRES_PASSWORD: password123 59 | POSTGRES_HOST: postgres 60 | POSTGRES_DB: disku 61 | 62 | networks: 63 | - dis-network 64 | 65 | networks: 66 | dis-network: 67 | -------------------------------------------------------------------------------- /backend/vector_store/migrations/0001_initial.sql: -------------------------------------------------------------------------------- 1 | -- Initial database migration for the vector search 2 | BEGIN; 3 | 4 | CREATE TABLE IF NOT EXISTS course ( 5 | id text PRIMARY KEY, 6 | title text NOT NULL, 7 | content text NOT NULL, 8 | last_modified timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL 9 | ); 10 | 11 | CREATE TABLE IF NOT EXISTS coordinator ( 12 | email text PRIMARY KEY, 13 | full_name text NOT NULL 14 | ); 15 | 16 | CREATE TABLE IF NOT EXISTS course_coordinator ( 17 | course_id text, 18 | email text, 19 | PRIMARY KEY (course_id, email), 20 | FOREIGN KEY (course_id) REFERENCES course(id), 21 | FOREIGN KEY (email) REFERENCES coordinator(email) 22 | ); 23 | 24 | CREATE EXTENSION IF NOT EXISTS vector; 25 | 26 | CREATE TABLE IF NOT EXISTS name_embedding ( 27 | email text PRIMARY KEY, 28 | embedding vector(384) NOT NULL, 29 | FOREIGN KEY (email) REFERENCES coordinator(email) 30 | ); 31 | 32 | CREATE TABLE IF NOT EXISTS title_embedding ( 33 | course_id text PRIMARY KEY, 34 | embedding vector(384) NOT NULL, 35 | last_modified timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL, 36 | FOREIGN KEY (course_id) REFERENCES course(id) 37 | ); 38 | 39 | CREATE TABLE IF NOT EXISTS content_embedding ( 40 | course_id text PRIMARY KEY, 41 | embedding vector(384) NOT NULL, 42 | last_modified timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL, 43 | FOREIGN KEY (course_id) REFERENCES course(id) 44 | ); 45 | 46 | COMMIT; 47 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "frontend", 3 | "version": "1.0.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "vite dev", 7 | "build": "vite build", 8 | "preview": "vite preview", 9 | "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json", 10 | "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch", 11 | "lint": "prettier --check . && eslint ." 12 | }, 13 | "devDependencies": { 14 | "@eslint/js": "^9.9.0", 15 | "@sveltejs/adapter-node": "^2.0.0", 16 | "@sveltejs/kit": "^2.5.27", 17 | "@sveltejs/vite-plugin-svelte": "^4.0.0", 18 | "@types/node": "^20.4.9", 19 | "autoprefixer": "^10.4.14", 20 | "eslint": "^9.9.0", 21 | "globals": "^15.9.0", 22 | "postcss": "^8.4.31", 23 | "prettier": "^3.3.3", 24 | "svelte": "^5.0.0", 25 | "svelte-check": "^4.0.0", 26 | "tailwindcss": "^3.3.2", 27 | "tslib": "^2.4.1", 28 | "typescript": "^5.5.0", 29 | "typescript-eslint": "^8.1.0", 30 | "vite": "^5.4.4" 31 | }, 32 | "type": "module", 33 | "dependencies": { 34 | "@popperjs/core": "^2.11.8", 35 | "chart.js": "^4.3.3", 36 | "chartjs-plugin-datalabels": "^2.2.0", 37 | "lorem-ipsum": "^2.0.8", 38 | "postgres": "^3.3.4", 39 | "prettier-plugin-svelte": "^3.2.6", 40 | "tailwind-merge": "^1.13.2" 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /frontend/src/components/CheckboxMenu.svelte: -------------------------------------------------------------------------------- 1 | 13 | 14 |
15 |
16 |

{header_name}

17 |
18 | 19 |
20 | 26 | {#each options as option} 27 | 28 | 42 | {/each} 43 |
44 |
45 | -------------------------------------------------------------------------------- /backend/rust_parser/src/parser/workload_information.rs: -------------------------------------------------------------------------------- 1 | use crate::parser::{Workload, WorkloadType}; 2 | use anyhow::{ensure, Context, Result}; 3 | 4 | use tl::VDom; 5 | 6 | pub fn parse_workloads(dom: &VDom) -> Result> { 7 | let parser = dom.parser(); 8 | let workload_table = dom 9 | .get_element_by_id("course-load") 10 | .context("Unable to find workload table")? 11 | .get(parser) 12 | .unwrap() 13 | .as_tag() 14 | .unwrap(); 15 | 16 | let mut lis = workload_table 17 | .query_selector(parser, "li") 18 | .context("Unable to find any workload information")? 19 | .map(|node| node.get(parser).unwrap().inner_text(parser)) 20 | .skip(2) 21 | .peekable(); 22 | 23 | let mut workloads = Vec::new(); 24 | // take two at a time from lis 25 | // first is the workload type 26 | // second is the workload value 27 | let mut pair: Vec = Vec::new(); 28 | while lis.peek().is_some() { 29 | if pair.len() == 2 { 30 | let workload = Workload { 31 | workload_type: WorkloadType::from_str(&pair[0])?, 32 | hours: pair[1] 33 | .replace(',', ".") 34 | .parse::() 35 | .context(format!("Unable to parse workload hours: {}", pair[1]))?, 36 | }; 37 | workloads.push(workload); 38 | pair.clear(); 39 | } 40 | pair.push(lis.next().unwrap().to_string()); 41 | } 42 | 43 | ensure!( 44 | !workloads.is_empty(), 45 | "Unable to parse workload information" 46 | ); 47 | Ok(workloads) 48 | } 49 | -------------------------------------------------------------------------------- /backend/rust_parser/src/parser/content_serialiser.rs: -------------------------------------------------------------------------------- 1 | use crate::parser::Description; 2 | 3 | use tl::VDom; 4 | 5 | use anyhow::{Context, Result}; 6 | 7 | // grab some specific htmls and return the html 8 | pub fn grab_htmls(dom: &VDom) -> Result { 9 | let parser = dom.parser(); 10 | let content_html = dom 11 | .get_element_by_id("course-content") 12 | .context("Unable to find course content")? 13 | .get(parser) 14 | .context("Unable to grab parser for the dom, this should not happen")?; 15 | 16 | let learning_outcome_html = dom 17 | .get_element_by_id("course-description") 18 | .context("Unable to find learning outcomes")? 19 | .get(parser) 20 | .context("Unable to grab parser for the dom, this should not happen")? 21 | .inner_html(parser); 22 | 23 | // Handle that recommended qualifications might be none 24 | let recommended_qualifications_html = dom 25 | .get_element_by_id("course-skills") // this might be none 26 | .and_then(|elem| { 27 | Some( 28 | elem.get(parser) 29 | .context("Unable to grab parser for the dom, this should not happen") 30 | .ok()? 31 | .inner_html(parser), 32 | ) 33 | }); 34 | 35 | let recommended_qualifications_html = 36 | recommended_qualifications_html.filter(|s| !(s.contains("Ingen") || s.contains("None"))); 37 | 38 | // grab the first 300 chars of the content 39 | let summary = content_html 40 | .inner_text(parser) 41 | .chars() 42 | .take(300) 43 | .collect::(); 44 | 45 | Ok(Description { 46 | content: content_html.inner_html(parser).to_string(), 47 | learning_outcome: learning_outcome_html.to_string(), 48 | recommended_qualifications: recommended_qualifications_html.map(|s| s.to_string()), 49 | summary, 50 | }) 51 | } 52 | -------------------------------------------------------------------------------- /frontend/src/routes/sitemap.xml/+server.ts: -------------------------------------------------------------------------------- 1 | import { dev } from "$app/environment"; 2 | 3 | const url = dev ? "http://localhost:3000" : "https://kucourses.dk"; 4 | 5 | function today_yyyy_mm_dd(): string { 6 | const d = new Date(); 7 | const iso = d.toISOString(); 8 | return iso.substring(0, 10); 9 | } 10 | 11 | function generate_xml(course_id: string): string { 12 | return ` 13 | 14 | ${url}/course/${course_id} 15 | 0.8 16 | ${today_yyyy_mm_dd()} 17 | 18 | `; 19 | } 20 | 21 | export async function GET() { 22 | // grab all course-ids from the get-course-ids endpoint which gives a list of json objects 23 | // [{course_id: "course1"}, {course_id: "course2"}] 24 | // 25 | const res = await fetch(`${url}/api/get-all-course-ids`); 26 | const json = await res.json(); 27 | const today = today_yyyy_mm_dd(); 28 | 29 | const course_ids = json.map((x: { course_id: string }) => x.course_id); 30 | 31 | return new Response( 32 | ` 33 | 34 | 42 | 43 | 44 | 45 | ${url} 46 | ${today} 47 | 1.0 48 | 49 | 50 | ${course_ids.map(generate_xml).join("\n")} 51 | 52 | `.trim(), 53 | { 54 | headers: { 55 | "Content-Type": "application/xml", 56 | }, 57 | } 58 | ); 59 | } 60 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | All contributions, be it big or small, are encouraged and any help with the project is greatly appreciated. 4 | 5 | ## I want to contribute 6 | ### Boring legal nonsense 7 | By creating a pull request to the project you fully agree that you have the rights to distribute the code in question and that the code, if licensed, can exist under the MIT license. 8 | ### Feature requests 9 | If you have a feature request, please check if it isn't already present in the [backlog](https://github.com/users/joshniemela/projects/5). If it isn't, then either open up an issue or send the request to [Josh Niemelä](mailto:josh@jniemela.dk). 10 | ### Pull requests 11 | Anything goes, but it is expected that a pull request should solve some particular issue in the active issues, or something in the backlog (make a feature request if your contribution doesn't fit the aforementioned). This pull request is expected to contain code that has been run through its respective formatter (Black for Python, Cljfmt for Clojure, (To come) for TS/Svelte). 12 | 13 | ### Running the project 14 | The project contains a .env file which can be set to development or production, developmenet is the one that should be used. The individual components of the project can be run collectively using `docker-compose` or individually (`lein run` for Clojure, `pipenv run` for Python, `npm run dev` for TS/Svelte). 15 | 16 | ## Bugs 17 | If you've found a bug or something that isn't intuitive in the user interface: 18 | * Ensure this isn't already a known bug by looking at the [issues](https://github.com/joshniemela/disproject/issues). 19 | * Try to replicate the unexpected behaviour. 20 | * Please include the OS, Browser and other useful information in the bug report to make it easier to narrow it down. 21 | * Write an issue about the problem, eventually possible solutions to the problem. 22 | * Lastly, feel welcome to assign yourself to fixing the problem or tagging someone who might be able to fix it. 23 | 24 | -------------------------------------------------------------------------------- /frontend/src/components/BigCheckbox.svelte: -------------------------------------------------------------------------------- 1 | 19 | 20 |
21 | 31 | 32 |
33 | 39 | {#each options as option} 40 | 41 | 55 | {/each} 56 |
57 |
58 | -------------------------------------------------------------------------------- /frontend/src/components/Footer/Footer.svelte: -------------------------------------------------------------------------------- 1 | 14 | 15 | 57 | 58 | 69 | -------------------------------------------------------------------------------- /frontend/src/theme.ts: -------------------------------------------------------------------------------- 1 | const theme = { 2 | colors: { 3 | kuRed: "#901a1e", 4 | kuGray: "#14314f", 5 | darkGray: "#2b2d41", 6 | dulledWhite: "#f4f5f7", 7 | greyedOut: "#8d99ad", 8 | brand: { 9 | 100: "#270102", 10 | 200: "#370002", 11 | 300: "#630307", 12 | 400: "#780D10", 13 | 500: "#901A1E", 14 | 600: "#B84044", 15 | 700: "#D27275", 16 | 800: "#E5A3A5", 17 | 900: "#FCEBEC", 18 | }, 19 | neutral: { 20 | 100: "#03080E", 21 | 200: "#101E2D", 22 | 300: "#1A2A39", 23 | 400: "#273441", 24 | 500: "#3A4550", 25 | 600: "#7B7E81", 26 | 700: "#C2C2C2", 27 | 800: "#F2EFEF", 28 | 900: "#FFFFFF", 29 | }, 30 | green: { 31 | 100: "#013100", 32 | 200: "#026200", 33 | 300: "#049001", 34 | 400: "#0FBC0C", 35 | 500: "#24D921", 36 | 600: "#3EEE3B", 37 | 700: "#71FF6F", 38 | 800: "#88FF86", 39 | 900: "#AEFFAC", 40 | }, 41 | orange: { 42 | 100: "#302300", 43 | 200: "#624600", 44 | 300: "#906801", 45 | 400: "#BC8A0C", 46 | 500: "#D9A521", 47 | 600: "#EEBB3B", 48 | 700: "#FFD66F", 49 | 800: "#FFDD86", 50 | 900: "#FFE8AC", 51 | }, 52 | red: { 53 | 100: "#300000", 54 | 200: "#620000", 55 | 300: "#900101", 56 | 400: "#BC0C0C", 57 | 500: "#D92121", 58 | 600: "#EE3B3B", 59 | 700: "#FF6F6F", 60 | 800: "#FF8686", 61 | 900: "#FFACAC", 62 | }, 63 | blue: { 64 | 100: "#001330", 65 | 200: "#002762", 66 | 300: "#013B90", 67 | 400: "#0C52BC", 68 | 500: "#216AD9", 69 | 600: "#3B82EE", 70 | 700: "#6FA8FF", 71 | 800: "#86B6FF", 72 | 900: "#ACCDFF", 73 | }, 74 | }, 75 | }; 76 | 77 | export default theme; 78 | -------------------------------------------------------------------------------- /frontend/src/assets/Facebook.svelte: -------------------------------------------------------------------------------- 1 | 9 | 10 | 18 | 19 | 20 | 21 | 22 | 23 | 27 | 28 | 29 | 30 | 35 | 36 | 37 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /frontend/src/routes/course/[courseId]/+page.server.ts: -------------------------------------------------------------------------------- 1 | import { apiUrl } from "../../../stores"; 2 | import { total_hours } from "../../../course"; 3 | import type { Course, Statistics, Grade } from "../../../course"; 4 | 5 | // ungraded returns absent, total, fail and pass 6 | function transform_ungraded_stats(stats: Statistics) { 7 | return [ 8 | { grade: "Fail", count: stats.fail }, 9 | { grade: "Pass", count: stats.pass }, 10 | { grade: "Absent", count: stats.absent }, 11 | ]; 12 | } 13 | 14 | function transform_graded_stats(stats: Statistics) { 15 | return stats.grades.map((grade) => { 16 | return { grade: grade.grade, count: grade.count }; 17 | }); 18 | } 19 | 20 | function transform_stats(stats: Statistics | null) { 21 | if (stats == null) { 22 | return null; 23 | } else { 24 | if (stats.grades == null) { 25 | return transform_ungraded_stats(stats); 26 | } else { 27 | return transform_graded_stats(stats); 28 | } 29 | } 30 | } 31 | 32 | function null_to_zero(grades: Grade[] | null) { 33 | // in each grade, count pair, if count is null, set it to 0 34 | if (grades == null) { 35 | return undefined; 36 | } 37 | return grades.map((grade: Grade) => { 38 | if (grade.count == null) { 39 | return { grade: grade.grade, count: 0 }; 40 | } else { 41 | return grade; 42 | } 43 | }); 44 | } 45 | 46 | export async function load({ fetch, params }) { 47 | const { courseId } = params; 48 | const API_URL = apiUrl(); 49 | 50 | const res = await fetch( 51 | `${API_URL}/get-detailed-course-info?id=${courseId}`, 52 | { 53 | method: "GET", 54 | headers: { 55 | accept: "application/json", 56 | "Content-Type": "application/json", 57 | }, 58 | } 59 | ); 60 | 61 | const course: Course = await res.json(); 62 | const grades = null_to_zero(transform_stats(course.statistics)); 63 | const stats = course.statistics; 64 | if (stats !== null && stats !== undefined) { 65 | stats.grades = grades !== undefined ? grades : []; 66 | } 67 | return { 68 | courseId: courseId, 69 | course: course, 70 | totalHours: total_hours(course), 71 | statistics: stats, 72 | loading: false, 73 | }; 74 | } 75 | -------------------------------------------------------------------------------- /frontend/src/components/Loader/Loader.svelte: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | 20 | 96 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # KU-Courses 2 | ⚠️ Future developement will continue on [Forgejo](https://git.argmin.dk/joshnie/KU-courses) 3 | 4 | ![Example of KU-Courses](frontend/static/assets/preview.png "KU Courses") 5 | 6 | The entire application is governed through the `docker-compose.yml` file and is built with `docker compose`: 7 | 8 | ## Starting the application 9 | 1. Install `docker` and `docker-compose`, this may need a restart of your system since Docker is a very low level program. 10 | 2. Run `docker compose up --build` as either a user with permissions to docker, or with `sudo`/`doas`, the build flag is required if the backend or frontend code has been changed, additionally `-d` will make it detach from the terminal. 11 | a. Add folders `exam_pdfs` and `pages` to the `data` folder. 12 | 4. Wait for the scraper in the backend to complete scraping pages, this may take about 15 minutes. 13 | 5. Run `docker compose restart`, this is required so that the parser will run and so that the vector store can create new embeddings. 14 | 6. ??? 15 | 7. PROFIT!!! 16 | 17 | 18 | ## db-manager 19 | The backend is built with Clojure, a functional programmering language based on Lisp which runs on the Java Virtual Machine. 20 | This part serves multiple purposes, it is responsible for scraping the course pages from KU as well as the statistics from STADS. 21 | The backend also serves the frontend and contains the "datascript" database and is responsible for refreshing and various services occasionally (this feature is partially broken at the moment). 22 | 23 | ## vector_store 24 | This service is responsible for the semantic searches used in the `get_course_overviews` route, instead of using trigrams or full-text, we decided to use vector searches for the lower latency. 25 | 26 | ## rust_parser 27 | This service is the parser that takes the scraped course pages and parses them into a format we can use in the database for searching and for serving to the frontend. 28 | 29 | ## frontend 30 | Frontend is built in Svelte/Typescript. This is a highly responsible SPA that shows the courses in the form of cards which can be clicked into to get a more detailed view of the course. 31 | 32 | 33 | # Credits 34 | * Thanks to [Jákup Lützen](https://github.com/BinFlush) for creating the original course parser in Python. 35 | * Thanks to [Kristian Pedersen](https://github.com/KristianDamPedersen) for creating the original frontend, and help in designing the architecture and first database schema. 36 | * Thanks to [Zander Bournonville](https://github.com/DrZder) for creating the statistics parser. 37 | -------------------------------------------------------------------------------- /backend/vector_store/src/populate.rs: -------------------------------------------------------------------------------- 1 | use super::{Coordinator, PostgresDB}; 2 | use anyhow::Result; 3 | use nanohtml2text::html2text; 4 | use serde::Deserialize; 5 | use std::fs::File; 6 | use std::io::BufReader; 7 | use std::path::Path; 8 | 9 | #[derive(Deserialize, Clone)] 10 | pub struct Document { 11 | pub title: String, 12 | pub info: Info, 13 | pub description: Description, 14 | pub logistics: Logistics, 15 | } 16 | 17 | #[derive(Deserialize, Clone)] 18 | pub struct Logistics { 19 | pub coordinators: Vec, 20 | } 21 | 22 | #[derive(Deserialize, Clone)] 23 | pub struct Description { 24 | pub content: String, 25 | } 26 | 27 | #[derive(Deserialize, Clone)] 28 | pub struct Info { 29 | pub id: String, 30 | } 31 | 32 | /// Upserts all the documents in the directory into the database 33 | /// This function is used to populate the database 34 | /// TODO: Remove population functionality from this service 35 | pub async fn upsert_documents_from_path(db: &PostgresDB, path: &Path) -> Result<()> { 36 | let documents = read_jsons(path)?; 37 | for document in documents { 38 | db.upsert_document(&document).await?; 39 | } 40 | Ok(()) 41 | } 42 | 43 | /// Reads a json file from the path and returns a Document 44 | /// This function also converts the html content to plain text and removes newlines 45 | fn read_json(path: &Path) -> Result { 46 | // TODO: this entire thing is awful, please rewrite 47 | let file = File::open(path)?; 48 | let reader = BufReader::new(file); 49 | let mut doc: Document = serde_json::from_reader(reader)?; 50 | doc.description.content = html2text(&doc.description.content); 51 | doc.description.content = doc.description.content.replace('\n', " "); 52 | doc.description.content = doc.description.content.replace('\t', " "); 53 | doc.description.content = doc.description.content.replace('\r', " "); 54 | Ok(doc) 55 | } 56 | 57 | /// Reads all the jsons in the directory and returns a Vec 58 | /// This function also converts the html content to plain text and removes newlines 59 | /// This function is used to populate the database 60 | /// TODO: Remove population functionality from this service 61 | fn read_jsons(path: &Path) -> Result> { 62 | // this should read all the jsons in the directory 63 | let file_names = std::fs::read_dir(path)?; 64 | let mut documents = Vec::new(); 65 | for file_name in file_names { 66 | let file_name = file_name?; 67 | let path = file_name.path(); 68 | let document = read_json(&path)?; 69 | documents.push(document); 70 | } 71 | Ok(documents) 72 | } 73 | -------------------------------------------------------------------------------- /backend/db-manager/src/exam_scraper/core.clj: -------------------------------------------------------------------------------- 1 | (ns exam-scraper.core 2 | (:require [clojure.java.io :as io] 3 | [clojure.string :as string] 4 | [clojure.set :as set]) 5 | (:import (java.io File) 6 | (org.apache.commons.cli DefaultParser) 7 | (technology.tabula CommandLineApp))) 8 | 9 | (defn to-command-line [options] 10 | (let [parser (DefaultParser.) 11 | build-options (CommandLineApp/buildOptions) 12 | args (into-array String options)] 13 | (.parse parser build-options args))) 14 | 15 | (def tabula-options ["-f" "TSV" "-g" "-p" "all"]) 16 | 17 | (defn convert-exam-pdf-to-tsv [pdf-file out-file] 18 | (let [cmd-line (to-command-line tabula-options) 19 | cli-app (CommandLineApp. System/out cmd-line)] 20 | (.extractFileInto cli-app pdf-file out-file))) 21 | 22 | (defn get-itx-courses-from-file [pdf-file] 23 | ; the course code is on the first column, if the second column contains "ITX" anywheer in the row it's an ITX course 24 | ; start by converting to tsv at a temporary location 25 | (let [tsv-file (File/createTempFile "tabula" ".tsv")] 26 | (try 27 | (convert-exam-pdf-to-tsv pdf-file tsv-file) 28 | (let [tsv (slurp tsv-file) 29 | lines (string/split-lines tsv) 30 | itx-courses (filter #(string/includes? % "ITX") lines)] 31 | (println "[exam scraper] Found" (count itx-courses) "ITX courses in" pdf-file) 32 | (map #(first (string/split % #"\t")) itx-courses)) 33 | (catch Exception e 34 | (println "[exam scraper] Failed to extract ITX courses from" pdf-file ":" (.getMessage e)) 35 | (.printStackTrace e) 36 | ; we don't handle this case yet, 37 | ; we just return an empty list and move on 38 | [])))) 39 | 40 | (defn get-itx-courses-from-dir [dir] 41 | (let [pdf-files (drop 1 (file-seq (io/file dir))) 42 | itx-courses (mapcat get-itx-courses-from-file pdf-files)] 43 | (distinct itx-courses))) 44 | 45 | 46 | (defn to-itx [exams-list] 47 | ; exams-list is a vector of maps, each key has a key, if this key is "Written", change it to "ITX") 48 | (map (fn [exam] 49 | (if (map? exam) 50 | (set/rename-keys exam {"Written" "ITX"}) 51 | exam)) exams-list)) 52 | 53 | ; I mistankenly thought they were a vector of maps, but they are a vector of maps OR strings 54 | 55 | 56 | ; make a functio nthat only does this for a single course 57 | (defn patch-course-exam [course itx-course-ids] 58 | (let [course-id (get-in course ["info" "id"]) 59 | itx? (some #(= course-id %) itx-course-ids)] 60 | (if itx? 61 | (assoc course "exams" (to-itx (get course "exams"))) 62 | course))) 63 | 64 | (defn patch-courses-w-itx [courses itx-course-ids] 65 | (map #(patch-course-exam % itx-course-ids) courses)) 66 | -------------------------------------------------------------------------------- /backend/db-manager/src/course_scraper/upsert.clj: -------------------------------------------------------------------------------- 1 | 2 | (ns course-scraper.upsert 3 | (:require [clojure.core :as c] 4 | [clojure.data.json :as json] 5 | [clojure.java.io :as io] 6 | [reitit.coercion.spec] 7 | [db-manager.db :refer [course-to-transaction remove-nils]] 8 | [datascript.core :as d]) 9 | (:gen-class)) 10 | 11 | ; https://andersmurphy.com/2022/03/27/clojure-removing-namespace-from-keywords-in-response-middleware.html 12 | (defn transform-keys 13 | [t coll] 14 | (clojure.walk/postwalk (fn [x] (if (map? x) (update-keys x t) x)) coll)) 15 | 16 | (defn remove-namespace-keywords-in-response-middleware [handler & _] 17 | (fn [req] 18 | (let [resp (handler req)] 19 | (cond-> resp 20 | (comp map? :body) (update :body 21 | (partial transform-keys 22 | (comp keyword name))))))) 23 | 24 | (defn try-finding-stats [stats-dir course-id] 25 | (try 26 | ; stats file is in stats-dir 27 | (let [stats-file (str stats-dir course-id ".json")] 28 | (json/read-str (slurp stats-file))) 29 | (catch Exception e 30 | nil))) 31 | 32 | (defn transform-stats [stats] 33 | (when-not (nil? (stats "exam")) 34 | (let [exam (stats "exam") 35 | pass-rate (exam "pass-rate") 36 | mean (exam "mean") 37 | median (exam "median") 38 | graded? (exam "graded") 39 | grades (exam "grades") 40 | absent (exam "absent") 41 | fail (exam "fail") 42 | pass (exam "pass") 43 | total (exam "total")] 44 | (if graded? 45 | {:statistics/pass-rate pass-rate 46 | :statistics/absent absent 47 | :statistics/fail fail 48 | :statistics/pass pass 49 | :statistics/total total 50 | :statistics/mean mean 51 | :statistics/median median 52 | :statistics/grades grades} 53 | {:statistics/pass-rate pass-rate 54 | :statistics/pass pass 55 | :statistics/absent absent 56 | :statistics/fail fail 57 | :statistics/total total})))) 58 | 59 | 60 | (defn transactions-w-stats [stats-finder courses] (map (fn [course] 61 | (let [course-id (get-in course ["info" "id"]) 62 | stats (stats-finder course-id) 63 | transacted-course (course-to-transaction course)] 64 | (remove-nils (if stats 65 | (assoc transacted-course :course/statistics (transform-stats stats)) 66 | transacted-course)))) 67 | courses)) 68 | 69 | (defn read-json-file [file-name] 70 | (let [file (slurp file-name)] 71 | (json/read-str file))) 72 | -------------------------------------------------------------------------------- /frontend/src/stores.ts: -------------------------------------------------------------------------------- 1 | import { writable } from "svelte/store"; 2 | import type { Writable } from "svelte/store"; 3 | import { browser } from "$app/environment"; 4 | 5 | const VERSION = "v1.0.0"; 6 | const VERSION_KEY = "version"; 7 | 8 | // Generic store functions 9 | function setSessionStore(key: string, value: T): void { 10 | sessionStorage.setItem(key, JSON.stringify(value)); 11 | } 12 | 13 | function getSessionStore(key: string): T | null { 14 | return JSON.parse(sessionStorage.getItem(key) || "null") as T; 15 | } 16 | 17 | // A generic writable store that persists to sessionStorage 18 | export function writableSession(key: string, value: T): Writable { 19 | if (!browser) return writable(value); // Mock for SSR 20 | const storedVersion = sessionStorage.getItem(VERSION_KEY); 21 | 22 | // Cache busting 23 | if (storedVersion !== VERSION) { 24 | sessionStorage.removeItem(key); 25 | sessionStorage.setItem(VERSION_KEY, VERSION); 26 | } 27 | 28 | const sessionValue = getSessionStore(key); 29 | if (!sessionValue) setSessionStore(key, value); 30 | 31 | const store = writable(sessionValue || value); 32 | store.subscribe((value) => { 33 | setSessionStore(key, value); 34 | }); 35 | 36 | return store; 37 | } 38 | // END Generic store functions 39 | 40 | // make a writableSession if we have a browser 41 | const emptyQuery = { 42 | blocks: [], 43 | degrees: [], 44 | schedules: [], 45 | exams: [], 46 | departments: [], 47 | languages: [], 48 | search: "", 49 | }; 50 | 51 | export const queryStore = writableSession("filters", emptyQuery); 52 | 53 | export function clearAll() { 54 | // Cause the checkboxes to update 55 | queryStore.update((store) => { 56 | store.blocks = []; 57 | store.degrees = []; 58 | store.schedules = []; 59 | store.exams = []; 60 | store.departments = []; 61 | store.languages = []; 62 | store.search = ""; 63 | return store; 64 | }); 65 | } 66 | 67 | // API URL 68 | export function apiUrl() { 69 | return "https://kucourses.dk/api"; // SSR 70 | // check that window is defined, this is used for checking if we are running in the browser 71 | if (typeof window === "undefined") { 72 | return "https://kucourses.dk/api"; // SSR 73 | } 74 | 75 | const hostname = window.location.hostname; 76 | if (hostname == "localhost") { 77 | return "http://localhost:3000/api"; 78 | } 79 | 80 | // if running on another host, assume we are in prod 81 | return "https://" + hostname + "/api"; 82 | } 83 | 84 | function xorString(str: string, key: number): string { 85 | return str 86 | .split("") 87 | .map((char) => String.fromCharCode(char.charCodeAt(0) ^ key)) 88 | .join(""); 89 | } 90 | // mail obfuscator/deobfuscator using XOR, this should return a function with no arguments that returns a string 91 | export function obfuscateEmail(email: string): () => string { 92 | // generate the key by summing the char codes of the email and mod 256 93 | const key = 94 | email.split("").reduce((acc, char) => acc + char.charCodeAt(0), 0) % 95 | 256; 96 | const obfuscated = xorString(email, key); 97 | return () => xorString(obfuscated, key); 98 | } 99 | -------------------------------------------------------------------------------- /frontend/tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | export default { 3 | content: ["./src/**/*.{html,js,svelte,ts}"], 4 | theme: { 5 | extend: { 6 | spacing: { 7 | text: "clamp(45ch,50%,75ch)", 8 | }, 9 | listStyleType: { square: "square" }, 10 | colors: { 11 | kuRed: "#901A1E", 12 | kuGray: "#333333", 13 | darkGray: "#2b2d41", 14 | dulledWhite: "#F4F5F7", 15 | greyedOut: "#8D99AD", 16 | brand: { 17 | 100: "#270102", 18 | 200: "#370002", 19 | 300: "#630307", 20 | 400: "#780D10", 21 | 500: "#901A1E", 22 | 600: "#B84044", 23 | 700: "#D27275", 24 | 800: "#E5A3A5", 25 | 900: "#FCEBEC", 26 | }, 27 | neutral: { 28 | 100: "#03080E", 29 | 200: "#101E2D", 30 | 300: "#1A2A39", 31 | 400: "#273441", 32 | 500: "#3A4550", 33 | 600: "#7B7E81", 34 | 700: "#C2C2C2", 35 | 800: "#F2EFEF", 36 | 900: "#FFFFFF", 37 | }, 38 | green: { 39 | 100: "#013100", 40 | 200: "#026200", 41 | 300: "#049001", 42 | 400: "#0FBC0C", 43 | 500: "#24D921", 44 | 600: "#3EEE3B", 45 | 700: "#71FF6F", 46 | 800: "#88FF86", 47 | 900: "#AEFFAC", 48 | }, 49 | orange: { 50 | 100: "#302300", 51 | 200: "#624600", 52 | 300: "#906801", 53 | 400: "#BC8A0C", 54 | 500: "#D9A521", 55 | 600: "#EEBB3B", 56 | 700: "#FFD66F", 57 | 800: "#FFDD86", 58 | 900: "#FFE8AC", 59 | }, 60 | red: { 61 | 100: "#300000", 62 | 200: "#620000", 63 | 300: "#900101", 64 | 400: "#BC0C0C", 65 | 500: "#D92121", 66 | 600: "#EE3B3B", 67 | 700: "#FF6F6F", 68 | 800: "#FF8686", 69 | 900: "#FFACAC", 70 | }, 71 | blue: { 72 | 100: "#001330", 73 | 200: "#002762", 74 | 300: "#013B90", 75 | 400: "#0C52BC", 76 | 500: "#216AD9", 77 | 600: "#3B82EE", 78 | 700: "#6FA8FF", 79 | 800: "#86B6FF", 80 | 900: "#ACCDFF", 81 | }, 82 | }, 83 | }, 84 | keyframes: { 85 | fadeIn: { 86 | "0%": { opacity: "0" }, 87 | "100%": { opacity: "1" }, 88 | }, 89 | }, 90 | animation: { 91 | fadeIn: "fadeIn 0.5s ease-in-out", 92 | }, 93 | }, 94 | }; 95 | -------------------------------------------------------------------------------- /backend/db-manager/src/statistics/utils.clj: -------------------------------------------------------------------------------- 1 | (ns statistics.utils) 2 | 3 | (defn transform-obj 4 | "Convert {\" grade \": \" 12 \", \" count \": 13...} to {\" 12 \" 13...}" 5 | [obj] 6 | (into {} (map (fn [x] {(:grade x) (:count x)}) obj))) 7 | 8 | (def passing-grades ["Passed" "12" "10" "7" "4" "02"]) 9 | (def failing-grades ["00" "-3" "Failed" "Absent"]) 10 | (def grade-steps ["12" "10" "7" "4" "02" "00" "-3"]) 11 | 12 | (defn grade-repeats 13 | "Repeat the grade-steps the number of times they appear in the exam-table, 14 | for instance if 7 appears 3 times, we repeat 7 three times, this is a hack to calculate statistics" 15 | [exam-table] 16 | (let [transformed (transform-obj exam-table) 17 | grades (select-keys transformed grade-steps)] 18 | (apply concat (map (fn [x] (repeat (transformed x) (Integer/parseInt x))) (keys grades))))) 19 | 20 | ; if the sum of all the 7 grades is 0 then we can assume the course is a pass/fail course 21 | ; and not a graded course, some pass/fail courses have the 7 grades in them as all zeros 22 | (defn is-pass-fail? [exam-table] 23 | ; select the grades from the exam table that are in the 7 step scale 24 | (let [grades (select-keys (transform-obj exam-table) grade-steps)] 25 | ; some weird courses like LNAK10082E have a single graded thing and otherwise pass 26 | (> 5 (apply + (vals grades))))) 27 | 28 | (defn total [exam-table] 29 | (apply + (vals (transform-obj exam-table)))) 30 | 31 | (defn pass-total [exam-table] 32 | (let [grades (select-keys (transform-obj exam-table) passing-grades)] 33 | (apply + (vals grades)))) 34 | 35 | (defn fail-total [exam-table] 36 | (let [grades (select-keys (transform-obj exam-table) failing-grades)] 37 | (apply + (vals grades)))) 38 | 39 | (defn pass-rate [exam-table] 40 | (let [total-pass (pass-total exam-table) 41 | total-fail (fail-total exam-table)] 42 | (/ total-pass (+ total-pass total-fail)))) 43 | 44 | (defn median [exam-table] 45 | (let [sorted-grades (sort (grade-repeats exam-table)) 46 | total-count (count sorted-grades)] 47 | (defn nth-elem [n] 48 | (nth sorted-grades n)) 49 | (if (odd? total-count) 50 | (nth-elem (/ total-count 2)) 51 | (/ (+ (nth-elem (/ total-count 2)) (nth-elem (dec (/ total-count 2)))) 2)))) 52 | 53 | (defn stats-pass-fail [exam-table] 54 | {:pass-rate-w-absent (pass-rate exam-table) 55 | 56 | ; this calculates the pass-rate without the absent students 57 | ; (it will be higher than the pass-rate with absent students) 58 | :pass-rate (pass-rate (filter (fn [x] (not= (:grade x) "Absent")) exam-table)) 59 | :total (total exam-table) 60 | :pass (pass-total exam-table) 61 | :fail (fail-total exam-table) 62 | :absent ((transform-obj exam-table) "Absent")}) 63 | 64 | (defn squared-diff [x mean] 65 | (* (- x mean) (- x mean))) 66 | 67 | (defn stats-graded [exam-table] 68 | (let [repeats (grade-repeats exam-table) 69 | sum (reduce + repeats) 70 | total (count repeats) 71 | mean (/ sum total) 72 | var (/ (reduce + (map (fn [x] (squared-diff x mean)) repeats)) (- total 1))] 73 | {:mean mean 74 | :median (median exam-table) 75 | :var var 76 | :grades exam-table})) 77 | 78 | (defn stats [exam-table] 79 | (if (is-pass-fail? exam-table) 80 | (assoc (stats-pass-fail exam-table) :graded false) 81 | (assoc (merge (stats-pass-fail exam-table) (stats-graded exam-table)) :graded true))) 82 | -------------------------------------------------------------------------------- /frontend/src/components/Changelog/ChangelogModal.svelte: -------------------------------------------------------------------------------- 1 | 67 | 68 | {#if $modalStore} 69 | 73 |
76 |
79 |

Changelog

80 | 83 |
84 |
    85 | {#each changelogItems.reverse() as { date, changes }} 86 |
  • 87 |

    {date}

    88 |
      89 | {#each changes as change} 90 |
    • {change}
    • 91 | {/each} 92 |
    93 |
  • 94 | {/each} 95 |
96 |
97 |
98 | {/if} 99 | -------------------------------------------------------------------------------- /frontend/src/course.ts: -------------------------------------------------------------------------------- 1 | // TYPES FOR COURSE 2 | // TODO: make workload an enum 3 | export type Workload = { 4 | hours: number; 5 | type: string; 6 | }; 7 | export type Employee = { 8 | full_name: string; 9 | email: string; 10 | }; 11 | export type Schedule = { 12 | type: string; 13 | }; 14 | 15 | export type Block = { 16 | type: string; 17 | }; 18 | 19 | export type Language = { 20 | name: string; 21 | }; 22 | 23 | export type Description = { 24 | // TODO: rename type and string since it is a reserved keyword 25 | type: string; 26 | string: string; 27 | }; 28 | 29 | export type Exam = { 30 | duration: number; 31 | type: string; 32 | }; 33 | 34 | export type Degree = { 35 | type: string; 36 | }; 37 | 38 | export type Department = { 39 | name: string; 40 | }; 41 | 42 | export type Faculty = { 43 | name: string; 44 | }; 45 | 46 | export type Coordinator = { 47 | name: string; 48 | email: string; 49 | }; 50 | 51 | export type Grade = { 52 | grade: string; 53 | count: number; 54 | }; 55 | 56 | export type Statistics = { 57 | grades: Grade[]; 58 | fail: number; 59 | mean: number; 60 | median: number; 61 | pass: number; 62 | absent: number; 63 | "pass-rate": number; 64 | total: number; 65 | }; 66 | 67 | export type Course = { 68 | department: Department[]; 69 | schedule: Schedule[]; 70 | block: Block[]; 71 | content: string; 72 | "learning-outcome": string; 73 | duration: string; 74 | faculty: Faculty[]; 75 | title: string; 76 | statistics: Statistics | null; 77 | ects: number; 78 | coordinator: Coordinator[]; 79 | language: Language[]; 80 | exam: Exam[]; 81 | id: string; 82 | degree: Degree[]; 83 | "recommended-qualifications": string; 84 | workload: Workload[]; 85 | }; 86 | 87 | export const empty_course: Course = { 88 | department: [], 89 | schedule: [], 90 | block: [], 91 | content: "", 92 | "learning-outcome": "", 93 | duration: "", 94 | faculty: [], 95 | title: "", 96 | statistics: null, 97 | ects: 0, 98 | coordinator: [], 99 | language: [], 100 | exam: [], 101 | id: "", 102 | degree: [], 103 | "recommended-qualifications": "", 104 | workload: [], 105 | }; 106 | 107 | export function total_hours(course: Course): number { 108 | let total = 0; 109 | course.workload.forEach((workload) => { 110 | total += workload.hours; 111 | }); 112 | return total; 113 | } 114 | 115 | // Same as course but removed the employees and workloads and desc is just a string 116 | export type Overview = { 117 | schedule: Schedule[]; 118 | block: Block[]; 119 | title: string; 120 | statistics: StatisticsOverview | null; 121 | summary: string; 122 | ects: number; 123 | language: Language[]; 124 | exam: Exam[]; 125 | id: string; 126 | degree: Degree[]; 127 | }; 128 | 129 | export type StatisticsOverview = { 130 | mean: number; 131 | median: number; 132 | "pass-rate": number; 133 | }; 134 | 135 | export const empty_overview: Overview = { 136 | schedule: [], 137 | block: [], 138 | title: "", 139 | statistics: null, 140 | summary: "", 141 | ects: 0, 142 | language: [], 143 | exam: [], 144 | id: "", 145 | degree: [], 146 | }; 147 | 148 | // Type for query store 149 | export interface Filters { 150 | blocks: string[]; 151 | degrees: string[]; 152 | schedules: string[]; 153 | exams: string[]; 154 | departments: string[]; 155 | languages: string[]; 156 | search: string; 157 | } 158 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "flake-utils": { 4 | "inputs": { 5 | "systems": "systems" 6 | }, 7 | "locked": { 8 | "lastModified": 1681202837, 9 | "narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=", 10 | "owner": "numtide", 11 | "repo": "flake-utils", 12 | "rev": "cfacdce06f30d2b68473a46042957675eebb3401", 13 | "type": "github" 14 | }, 15 | "original": { 16 | "owner": "numtide", 17 | "repo": "flake-utils", 18 | "type": "github" 19 | } 20 | }, 21 | "flake-utils_2": { 22 | "inputs": { 23 | "systems": "systems_2" 24 | }, 25 | "locked": { 26 | "lastModified": 1681202837, 27 | "narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=", 28 | "owner": "numtide", 29 | "repo": "flake-utils", 30 | "rev": "cfacdce06f30d2b68473a46042957675eebb3401", 31 | "type": "github" 32 | }, 33 | "original": { 34 | "owner": "numtide", 35 | "repo": "flake-utils", 36 | "type": "github" 37 | } 38 | }, 39 | "nixpkgs": { 40 | "locked": { 41 | "lastModified": 1683408522, 42 | "narHash": "sha256-9kcPh6Uxo17a3kK3XCHhcWiV1Yu1kYj22RHiymUhMkU=", 43 | "owner": "NixOS", 44 | "repo": "nixpkgs", 45 | "rev": "897876e4c484f1e8f92009fd11b7d988a121a4e7", 46 | "type": "github" 47 | }, 48 | "original": { 49 | "owner": "NixOS", 50 | "ref": "nixos-unstable", 51 | "repo": "nixpkgs", 52 | "type": "github" 53 | } 54 | }, 55 | "nixpkgs_2": { 56 | "locked": { 57 | "lastModified": 1681358109, 58 | "narHash": "sha256-eKyxW4OohHQx9Urxi7TQlFBTDWII+F+x2hklDOQPB50=", 59 | "owner": "NixOS", 60 | "repo": "nixpkgs", 61 | "rev": "96ba1c52e54e74c3197f4d43026b3f3d92e83ff9", 62 | "type": "github" 63 | }, 64 | "original": { 65 | "owner": "NixOS", 66 | "ref": "nixpkgs-unstable", 67 | "repo": "nixpkgs", 68 | "type": "github" 69 | } 70 | }, 71 | "root": { 72 | "inputs": { 73 | "flake-utils": "flake-utils", 74 | "nixpkgs": "nixpkgs", 75 | "rust-overlay": "rust-overlay" 76 | } 77 | }, 78 | "rust-overlay": { 79 | "inputs": { 80 | "flake-utils": "flake-utils_2", 81 | "nixpkgs": "nixpkgs_2" 82 | }, 83 | "locked": { 84 | "lastModified": 1703384182, 85 | "narHash": "sha256-g5K8bFBCIQ3x/j/MFTpkZo4It5SGWPwhBp/lASiy+pA=", 86 | "owner": "oxalica", 87 | "repo": "rust-overlay", 88 | "rev": "cb6395cb3c2f69ad028914c90bce833e51d339c9", 89 | "type": "github" 90 | }, 91 | "original": { 92 | "owner": "oxalica", 93 | "repo": "rust-overlay", 94 | "type": "github" 95 | } 96 | }, 97 | "systems": { 98 | "locked": { 99 | "lastModified": 1681028828, 100 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", 101 | "owner": "nix-systems", 102 | "repo": "default", 103 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", 104 | "type": "github" 105 | }, 106 | "original": { 107 | "owner": "nix-systems", 108 | "repo": "default", 109 | "type": "github" 110 | } 111 | }, 112 | "systems_2": { 113 | "locked": { 114 | "lastModified": 1681028828, 115 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", 116 | "owner": "nix-systems", 117 | "repo": "default", 118 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", 119 | "type": "github" 120 | }, 121 | "original": { 122 | "owner": "nix-systems", 123 | "repo": "default", 124 | "type": "github" 125 | } 126 | } 127 | }, 128 | "root": "root", 129 | "version": 7 130 | } 131 | -------------------------------------------------------------------------------- /backend/db-manager/src/db_manager/routes.clj: -------------------------------------------------------------------------------- 1 | (ns db-manager.routes 2 | (:require [clojure.spec.alpha :as s] 3 | [db-manager.db :refer [get-course-ids 4 | get-course-by-id 5 | get-courses]] 6 | [clojure.data.json :as json] 7 | [db-manager.cache :refer [cache]] 8 | [org.httpkit.client :as http])) 9 | 10 | ; TODO: fix code duplication, this also apperas in core.clj 11 | 12 | (def data-dir "../../data/") 13 | (def json-dir (str data-dir "json/")) 14 | (def stats-dir (str data-dir "statistics/")) 15 | 16 | (defn try-finding-stats [course-id] 17 | (try 18 | ; stats file is in stats-dir 19 | (let [stats-file (str stats-dir course-id ".json")] 20 | (json/read-str (slurp stats-file))) 21 | (catch Exception e 22 | nil))) 23 | 24 | (def ping-route 25 | ["/ping" 26 | {:name :ping 27 | :get (fn [_] 28 | {:status 200 29 | :body "pong"})}]) 30 | 31 | (defn api-routes [db] 32 | [["/get-all-course-ids" {:get {:parameters {} 33 | :responses {200 {:body [string?]}} 34 | :handler (fn [_] 35 | {:status 200 36 | :body (get-course-ids db)})}}] 37 | 38 | ; This route is used by the /course/:id route in the frontend, it returns a more detailed course 39 | ["/get-detailed-course-info" {:get {:parameters {:query {:id string?}} 40 | :responses {200 {:body map?}} 41 | :summary "Get a course by its id" 42 | :description "Returns a course with the given id" 43 | :handler (fn [{{{:keys [id]} :query} :parameters}] 44 | {:status 200 45 | :body (get-course-by-id db id)})}}] 46 | 47 | ; Better echo route, not used 48 | ["/echo" {:post {:parameters {:body map?} 49 | :handler (fn [request] 50 | (let [body (-> request :parameters :body)] 51 | {:status 200 52 | :body body}))}}] 53 | 54 | ; This route is used by the root route in the frontend, it returns an overview of all matching courses 55 | ; we expect a map of keys with vectors 56 | ["/find-course-overviews" {:post {:parameters {:body map?} 57 | :handler (fn [request] 58 | (let [predicates (-> request :parameters :body)] 59 | {:status 200 60 | ; make get-courses a partial without the db argument 61 | :body (let [get-courses-partial (partial get-courses db) 62 | courses (cache predicates get-courses-partial)] 63 | {:count (count courses) 64 | :courses courses})}))}}] 65 | ["/run-get-on-link" {:post {:parameters {:body map?} 66 | :handler (fn [request] 67 | (let [body (-> request :parameters :body) 68 | link (get body :link)] 69 | (println link) 70 | (let [response @(http/get link)] 71 | (if (= (:status response) 200) 72 | (let [body (:body response)] 73 | {:status 200 74 | :body body}) 75 | (do 76 | (println response) 77 | (throw (Exception. "Request failed")))))))}}]]) 78 | -------------------------------------------------------------------------------- /backend/db-manager/src/db_manager/core.clj: -------------------------------------------------------------------------------- 1 | (ns db-manager.core 2 | (:require [clojure.core :as c] 3 | [muuntaja.core :as m] 4 | [reitit.ring :as ring] 5 | [reitit.coercion.spec] 6 | [reitit.ring.coercion :as rrc] 7 | [reitit.ring.middleware.muuntaja :as muuntaja] 8 | [reitit.ring.middleware.parameters :as parameters] 9 | [reitit.swagger-ui :as swagger-ui] 10 | [reitit.swagger :as swagger] 11 | [org.httpkit.server :refer [run-server]] 12 | [db-manager.routes :refer [ping-route api-routes]] 13 | [db-manager.db :refer [schema]] 14 | [course-scraper.watcher :refer [sitemap-watcher scrape-course]] 15 | [statistics.core :refer [stats-watcher]] 16 | [ring.middleware.cors :refer [wrap-cors]] 17 | [io.staticweb.rate-limit.storage :as storage] 18 | [io.staticweb.rate-limit.middleware :refer [wrap-rate-limit ip-rate-limit]] 19 | [datascript.core :as d]) 20 | (:gen-class)) 21 | 22 | (def conn (d/create-conn schema)) 23 | 24 | (def storage (storage/local-storage)) 25 | 26 | ; limit each IP to 1000 api calls per hour 27 | (def limit (ip-rate-limit :limit-id 1000 (java.time.Duration/ofHours 1))) 28 | (def rate-limit-config {:storage storage :limit limit}) 29 | 30 | (def data-dir "../../data/") 31 | (def json-dir (str data-dir "new_json/")) 32 | (def pages-dir "../../data/pages") 33 | 34 | ; https://andersmurphy.com/2022/03/27/clojure-removing-namespace-from-keywords-in-response-middleware.html 35 | (defn transform-keys 36 | [t coll] 37 | (clojure.walk/postwalk (fn [x] (if (map? x) (update-keys x t) x)) coll)) 38 | 39 | (defn remove-namespace-keywords-in-response-middleware [handler & _] 40 | (fn [req] 41 | (let [resp (handler req)] 42 | (cond-> resp 43 | (comp map? :body) (update :body 44 | (partial transform-keys 45 | (comp keyword name))))))) 46 | 47 | (defn app [] 48 | (ring/ring-handler 49 | (ring/router 50 | [["/api/swagger.json" 51 | {:get {:no-doc true 52 | :swagger {:info {:title "KU courses backend API"} 53 | :basePath "/"} ;; prefix for all paths 54 | :handler (swagger/create-swagger-handler)}}] 55 | ["/api" {:middleware [remove-namespace-keywords-in-response-middleware]} 56 | ping-route 57 | (api-routes conn)]] 58 | {:data {:coercion reitit.coercion.spec/coercion 59 | :muuntaja m/instance 60 | ; TODO: fix the CORS middleware, it seems to not work for Chromium 61 | :middleware [[wrap-cors 62 | :access-control-allow-origin [#".*"] 63 | :access-control-allow-methods [:get :post] 64 | :access-control-allow-headers #{"accept" 65 | "accept-encoding" 66 | "accept-language" 67 | "authorization" 68 | "content-type" 69 | "origin"}] 70 | 71 | #(wrap-rate-limit % rate-limit-config) 72 | parameters/parameters-middleware 73 | muuntaja/format-middleware 74 | rrc/coerce-exceptions-middleware 75 | rrc/coerce-request-middleware 76 | rrc/coerce-response-middleware]}}) 77 | (ring/routes 78 | (swagger-ui/create-swagger-ui-handler {:path "/api" 79 | :url "/api/swagger.json"}) 80 | (ring/create-default-handler)))) 81 | 82 | (def main-config {:port 3000}) 83 | (defn -main [& args] 84 | ; concurrently run sitemap-watcher scrape-course and stats-watcher so that they don't block the server 85 | (future (sitemap-watcher scrape-course conn)) 86 | ; catch any potential errors and print them from the stats-watcher 87 | (future (try 88 | (stats-watcher) 89 | (catch Exception e 90 | (println e)))) 91 | 92 | (println "Starting server on port " (:port main-config)) 93 | (run-server (app) main-config)) 94 | -------------------------------------------------------------------------------- /frontend/src/components/GradeGraph/GradeGraph.svelte: -------------------------------------------------------------------------------- 1 | 107 | 108 |
109 | 110 |
111 | -------------------------------------------------------------------------------- /backend/rust_parser/src/parser/exam_information.rs: -------------------------------------------------------------------------------- 1 | use crate::parser::Exam; 2 | use anyhow::{bail, ensure, Context, Result}; 3 | 4 | use tl::{NodeHandle, VDom}; 5 | 6 | pub fn parse_course_exams(dom: &VDom) -> Result> { 7 | let parser = dom.parser(); 8 | let exam_table = dom 9 | .get_element_by_id("course-exams1") 10 | .context("Unable to find exam table, this should never happen??? i think?")? 11 | .get(parser) 12 | .unwrap() 13 | .as_tag() 14 | .unwrap(); 15 | 16 | let dts = exam_table 17 | .query_selector(parser, "dt") 18 | .context("Unable to find any dts, this should be impossible")?; 19 | let dds = exam_table 20 | .query_selector(parser, "dd") 21 | .context("Unable to find any dds, this should be impossible")?; 22 | 23 | ensure!( 24 | dds.clone().count() == dts.clone().count(), 25 | "Number of dds and dts in exam table does not match" 26 | ); 27 | 28 | let mut exams = Vec::::new(); 29 | for (dt, dd) in dts.zip(dds) { 30 | let dt_text = dt.get(parser).unwrap().inner_text(parser).to_string(); 31 | match dt_text.as_str() { 32 | "Type of assessment" | "Prøveform" => { 33 | let exam_boundary = dd 34 | .get(parser) 35 | .unwrap() 36 | .children() 37 | .unwrap() 38 | .boundaries(parser) 39 | .unwrap(); 40 | for j in exam_boundary.0..exam_boundary.1 { 41 | let text = NodeHandle::new(j).get(parser).unwrap().inner_text(parser); 42 | exams.push(parse_text_to_exam(&text)?); 43 | } 44 | ensure!( 45 | !exams.is_empty(), 46 | format!( 47 | "No exams found in exam table: {}", 48 | dd.get(parser).unwrap().inner_text(parser) 49 | ) 50 | ); 51 | } 52 | _ => continue, 53 | } 54 | } 55 | if exams.len() > 1 && exams[0] == exams[1] { 56 | exams.remove(0); 57 | } 58 | Ok(exams) 59 | } 60 | 61 | fn parse_text_to_exam(text: &str) -> Result { 62 | let split = text.split(", ").collect::>(); 63 | let exam_minutes = if split.clone().len() == 1 { 64 | None 65 | } else { 66 | // the first chars in split[1] are a duration in numbers 67 | let number = split[1] 68 | .chars() 69 | // take while numeric or a dot 70 | .take_while(|c| c.is_numeric() || *c == '.') 71 | .collect::() 72 | .parse::() 73 | // convert error to Nothing type as number is an option type 74 | .ok(); 75 | 76 | let factor = match split[1] { 77 | _ if split[1].contains("min") => Some(1), 78 | _ if split[1].contains("hour") || split[1].contains("time") => Some(60), 79 | _ if split[1].contains("day") || split[1].contains("dag") => Some(60 * 24), 80 | _ => None, 81 | }; 82 | match (number, factor) { 83 | (None, _) => None, 84 | (_, None) => None, 85 | (Some(number), Some(factor)) => Some((number * factor as f32) as u32), 86 | } 87 | }; 88 | 89 | let exam_name = split[0].to_lowercase().to_string(); 90 | match exam_name { 91 | _ if exam_name.contains("aflevering") || exam_name.contains("assignment") => { 92 | Ok(Exam::Assignment(exam_minutes)) 93 | } 94 | _ if exam_name.contains("skriftlig prøve") 95 | || exam_name.contains("skriftlig stedprøve") 96 | || exam_name.contains("written exam") => 97 | { 98 | Ok(Exam::Written(exam_minutes)) 99 | } 100 | _ if exam_name.contains("mundtlig prøve") 101 | || exam_name.contains("mundtligt forsvar") 102 | || exam_name.contains("oral exam") => 103 | { 104 | Ok(Exam::Oral(exam_minutes)) 105 | } 106 | _ if exam_name.contains("portfolio") 107 | || exam_name.contains("other") 108 | || exam_name.contains("andet") => 109 | { 110 | Ok(Exam::Other) 111 | } 112 | _ if exam_name.contains("løbende bedømmelse") 113 | || exam_name.contains("continuous assessment") => 114 | { 115 | Ok(Exam::ContinuousAssessment) 116 | } 117 | _ => bail!("Not implemented for exam type: {}", split[0]), 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /backend/rust_parser/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "1.1.1" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "anyhow" 16 | version = "1.0.75" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" 19 | 20 | [[package]] 21 | name = "diff" 22 | version = "0.1.13" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" 25 | 26 | [[package]] 27 | name = "itoa" 28 | version = "1.0.9" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" 31 | 32 | [[package]] 33 | name = "memchr" 34 | version = "2.6.4" 35 | source = "registry+https://github.com/rust-lang/crates.io-index" 36 | checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" 37 | 38 | [[package]] 39 | name = "pretty_assertions" 40 | version = "1.4.0" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" 43 | dependencies = [ 44 | "diff", 45 | "yansi", 46 | ] 47 | 48 | [[package]] 49 | name = "proc-macro2" 50 | version = "1.0.68" 51 | source = "registry+https://github.com/rust-lang/crates.io-index" 52 | checksum = "5b1106fec09662ec6dd98ccac0f81cef56984d0b49f75c92d8cbad76e20c005c" 53 | dependencies = [ 54 | "unicode-ident", 55 | ] 56 | 57 | [[package]] 58 | name = "quote" 59 | version = "1.0.33" 60 | source = "registry+https://github.com/rust-lang/crates.io-index" 61 | checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" 62 | dependencies = [ 63 | "proc-macro2", 64 | ] 65 | 66 | [[package]] 67 | name = "regex" 68 | version = "1.9.6" 69 | source = "registry+https://github.com/rust-lang/crates.io-index" 70 | checksum = "ebee201405406dbf528b8b672104ae6d6d63e6d118cb10e4d51abbc7b58044ff" 71 | dependencies = [ 72 | "aho-corasick", 73 | "memchr", 74 | "regex-automata", 75 | "regex-syntax", 76 | ] 77 | 78 | [[package]] 79 | name = "regex-automata" 80 | version = "0.3.9" 81 | source = "registry+https://github.com/rust-lang/crates.io-index" 82 | checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" 83 | dependencies = [ 84 | "aho-corasick", 85 | "memchr", 86 | "regex-syntax", 87 | ] 88 | 89 | [[package]] 90 | name = "regex-syntax" 91 | version = "0.7.5" 92 | source = "registry+https://github.com/rust-lang/crates.io-index" 93 | checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" 94 | 95 | [[package]] 96 | name = "rust_parser" 97 | version = "1.0.0" 98 | dependencies = [ 99 | "anyhow", 100 | "pretty_assertions", 101 | "regex", 102 | "serde", 103 | "serde_json", 104 | "tl", 105 | ] 106 | 107 | [[package]] 108 | name = "ryu" 109 | version = "1.0.15" 110 | source = "registry+https://github.com/rust-lang/crates.io-index" 111 | checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" 112 | 113 | [[package]] 114 | name = "serde" 115 | version = "1.0.188" 116 | source = "registry+https://github.com/rust-lang/crates.io-index" 117 | checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" 118 | dependencies = [ 119 | "serde_derive", 120 | ] 121 | 122 | [[package]] 123 | name = "serde_derive" 124 | version = "1.0.188" 125 | source = "registry+https://github.com/rust-lang/crates.io-index" 126 | checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" 127 | dependencies = [ 128 | "proc-macro2", 129 | "quote", 130 | "syn", 131 | ] 132 | 133 | [[package]] 134 | name = "serde_json" 135 | version = "1.0.107" 136 | source = "registry+https://github.com/rust-lang/crates.io-index" 137 | checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" 138 | dependencies = [ 139 | "itoa", 140 | "ryu", 141 | "serde", 142 | ] 143 | 144 | [[package]] 145 | name = "syn" 146 | version = "2.0.38" 147 | source = "registry+https://github.com/rust-lang/crates.io-index" 148 | checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" 149 | dependencies = [ 150 | "proc-macro2", 151 | "quote", 152 | "unicode-ident", 153 | ] 154 | 155 | [[package]] 156 | name = "tl" 157 | version = "0.7.7" 158 | source = "registry+https://github.com/rust-lang/crates.io-index" 159 | checksum = "d5e993a1c7c32fdf90a308cec4d457f507b2573acc909bd6e7a092321664fdb3" 160 | 161 | [[package]] 162 | name = "unicode-ident" 163 | version = "1.0.12" 164 | source = "registry+https://github.com/rust-lang/crates.io-index" 165 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 166 | 167 | [[package]] 168 | name = "yansi" 169 | version = "0.5.1" 170 | source = "registry+https://github.com/rust-lang/crates.io-index" 171 | checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" 172 | -------------------------------------------------------------------------------- /backend/rust_parser/src/parser/logistic_information.rs: -------------------------------------------------------------------------------- 1 | use crate::parser; 2 | use crate::parser::LogisticInformation; 3 | use anyhow::{bail, ensure, Result}; 4 | use tl::{NodeHandle, VDom}; 5 | 6 | // Convert two chars in a string to a u8 7 | fn double_hex_to_u8(hex: &str) -> u8 { 8 | let mut chars = hex.chars(); 9 | let first = chars.next().unwrap(); 10 | let second = chars.next().unwrap(); 11 | let first = first.to_digit(16).unwrap() as u8; 12 | let second = second.to_digit(16).unwrap() as u8; 13 | (first << 4) | second 14 | } 15 | 16 | fn deobfuscate_email(obfuscated_email: &str) -> Result { 17 | let mut split = obfuscated_email.split('-'); 18 | if split.clone().count() == 1 { 19 | return Ok(obfuscated_email.to_string()); 20 | } 21 | let text = split.next_back().unwrap(); 22 | 23 | let mut email = String::new(); 24 | // Iterate through the split and move in steps of two 25 | // we offset the numbers by 0..25 since thats how they are obfuscated 26 | // if the regex matches an email we return it 27 | // else we continue incrementing the offset and hoping we find a match 28 | let regex = regex::Regex::new(r"(.+@.+\..+)").unwrap(); 29 | for i in 0..25 { 30 | for j in (0..text.len()).step_by(2) { 31 | let hex = &text[j..j + 2]; 32 | let u8 = double_hex_to_u8(hex) - i; 33 | email.push(u8 as char); 34 | } 35 | 36 | if regex.is_match(&email) { 37 | return Ok(email); 38 | } 39 | email.clear(); 40 | } 41 | bail!("Unable to deobfuscate email: {}", obfuscated_email) 42 | } 43 | 44 | pub fn parse_logistic_info(dom: &VDom) -> Result { 45 | // Extract the information from the dom. 46 | let info: Vec<(String, Vec)> = extract_h5_li_pairs(dom)?; 47 | 48 | let mut departments: Vec = vec![]; 49 | let mut coordinators: Vec = vec![]; 50 | let mut faculty: Option = None; 51 | 52 | for (h5, lis) in &info { 53 | match h5.as_str() { 54 | "Kursusansvarlige" | "Course Coordinators" => { 55 | for li in lis { 56 | // the email should be removed from the name, it is enclosed in parenthesis 57 | let mut split = li.split('('); 58 | let name = split.next().unwrap().trim().to_string(); 59 | let obfuscated_email = 60 | split.next().unwrap().split(')').next().unwrap().to_string(); 61 | let email = deobfuscate_email(&obfuscated_email)?; 62 | coordinators.push(parser::Coordinator { name, email }); 63 | } 64 | } 65 | "Udbydende fakultet" | "Contracting faculty" => { 66 | let faculty_str = lis.first().unwrap(); 67 | match faculty_str.as_str() { 68 | "Det Natur- og Biovidenskabelige Fakultet" | "Faculty of Science" => { 69 | faculty = Some(parser::Faculty::Science) 70 | } 71 | _ => bail!("Unknown faculty: {} ", faculty_str), 72 | } 73 | } 74 | _ if h5.contains("institut") || h5.contains("department") => { 75 | for li in lis { 76 | departments.push(parser::Department::from_str(li)?); 77 | } 78 | } 79 | 80 | &_ => {} 81 | } 82 | } 83 | // ensure we have 84 | ensure!( 85 | !departments.is_empty(), 86 | format!("No departments found in logistic information: {:?}", info) 87 | ); 88 | 89 | Ok(parser::LogisticInformation { 90 | departments, 91 | faculty: faculty.unwrap(), 92 | coordinators, 93 | }) 94 | } 95 | 96 | pub fn extract_h5_li_pairs(dom: &VDom) -> Result)>> { 97 | let parser = dom.parser(); 98 | 99 | let raw_panel_bodies = dom.get_elements_by_class_name("panel-body"); 100 | let panel_bodies = 101 | raw_panel_bodies.map(|panel_body| panel_body.get(parser).unwrap().as_tag().unwrap()); 102 | 103 | let mut pairs: Vec<(String, Vec)> = vec![]; 104 | for panel_body in panel_bodies { 105 | let h5s = panel_body.query_selector(parser, "h5").unwrap(); 106 | // if it contains h5s, we have found the right body 107 | if h5s.clone().count() > 0 { 108 | for h5 in h5s { 109 | let h5_text = h5.get(parser).unwrap().inner_text(parser).to_string(); 110 | 111 | let inner_handle = h5.get_inner(); // This is the handle to the h5 tag 112 | 113 | // by magic we know that offsetting by 2 gives us the ul tag 114 | let ul_handle = NodeHandle::new(inner_handle + 2).get(parser).unwrap(); 115 | 116 | // get the chldren inside of next_sibling 117 | let mut children = vec![]; 118 | for child in ul_handle.as_tag().unwrap().children().top().iter() { 119 | let child_text = child.get(parser).unwrap().inner_text(parser).to_string(); 120 | children.push(child_text); 121 | } 122 | pairs.push((h5_text, children)); 123 | } 124 | } 125 | } 126 | Ok(pairs) 127 | } 128 | -------------------------------------------------------------------------------- /backend/vector_store/src/embedding.rs: -------------------------------------------------------------------------------- 1 | use super::{Coordinator, Course}; 2 | 3 | use anyhow::Result; 4 | use async_stream::stream; 5 | use fastembed::{Embedding, EmbeddingModel, InitOptions, TextEmbedding}; 6 | use futures_core::stream::Stream; 7 | use rayon::prelude::*; 8 | 9 | const BATCH_SIZE: usize = 32; 10 | 11 | /// Embedding for a course 12 | #[derive(Clone)] 13 | pub struct CourseEmbedding { 14 | pub id: String, 15 | pub title: Embedding, 16 | pub content: Embedding, 17 | } 18 | 19 | /// Embedding for a coordinator 20 | #[derive(Clone)] 21 | pub struct CoordinatorEmbedding { 22 | pub email: String, 23 | pub name: Embedding, 24 | } 25 | 26 | /// Embedder for courses and coordinators 27 | pub struct Embedder { 28 | pub model: TextEmbedding, 29 | } 30 | 31 | impl Embedder { 32 | pub fn new() -> Self { 33 | let model: TextEmbedding = 34 | TextEmbedding::try_new(InitOptions::new(EmbeddingModel::AllMiniLML12V2Q)) 35 | .expect("Failed to load model, please check your internet connection"); 36 | Self { model } 37 | } 38 | 39 | /// Embeds a Vec into course embeddings 40 | /// This returns an asynchronous stream of CourseEmbedding 41 | pub fn embed_courses( 42 | &self, 43 | documents: Vec, 44 | ) -> impl Stream + '_ { 45 | stream! { 46 | for batch in documents.chunks(BATCH_SIZE) { 47 | let embedded_courses = embed_course_batch(batch.to_vec(), &self.model).expect("Failed to embed courses, this should not happen"); 48 | for embedded_course in embedded_courses.iter().cloned() { 49 | yield embedded_course; 50 | } 51 | println!("Embedded batch of courses"); 52 | } 53 | } 54 | } 55 | 56 | /// Embeds a Vec into coordinator embeddings 57 | /// This returns an asynchronous stream of CoordinatorEmbedding 58 | pub fn embed_coordinators( 59 | &self, 60 | coordinators: Vec, 61 | ) -> impl Stream + '_ { 62 | stream! { 63 | for batch in coordinators.chunks(BATCH_SIZE) { 64 | let model = &self.model; 65 | let embedded_coordinators = embed_coordinator_batch( 66 | batch.to_vec(), 67 | model 68 | ).expect("Failed to embed coordinators, this should not happen"); 69 | for embedded_coordinator in embedded_coordinators.iter().cloned() { 70 | yield embedded_coordinator; 71 | } 72 | println!("Embedded batch of coordinators"); 73 | } 74 | } 75 | } 76 | 77 | // Embeds a query into an embedding 78 | // This returns an Embedding 79 | pub fn embed_query(&self, query: String) -> Embedding { 80 | query_embed(&query, &self.model).expect("Failed to embed query, this should not happen") 81 | } 82 | } 83 | 84 | /// Helper function to embed a batch of courses 85 | /// This returns a Vec 86 | fn embed_course_batch(courses: Vec, model: &TextEmbedding) -> Result> { 87 | let batch_size = Some(32); 88 | 89 | let embedded_titles = passage_embed( 90 | courses.par_iter().map(|x| x.title.clone()).collect(), 91 | model, 92 | batch_size, 93 | )?; 94 | 95 | let embedded_descriptions = passage_embed( 96 | courses.par_iter().map(|x| x.content.clone()).collect(), 97 | model, 98 | batch_size, 99 | )?; 100 | 101 | let embedded_courses: Vec = courses 102 | .iter() 103 | .cloned() 104 | .zip(embedded_titles.to_vec()) 105 | .zip(embedded_descriptions.to_vec()) 106 | .map(|((course, title), content)| CourseEmbedding { 107 | id: course.id, 108 | title, 109 | content, 110 | }) 111 | .collect(); 112 | 113 | Ok(embedded_courses) 114 | } 115 | 116 | /// Helper function to embed a batch of coordinators 117 | /// This returns a Vec 118 | fn embed_coordinator_batch( 119 | coordinators: Vec, 120 | model: &TextEmbedding, 121 | ) -> Result> { 122 | let name_embeddings = passage_embed( 123 | coordinators.iter().map(|x| x.name.clone()).collect(), 124 | model, 125 | Some(32), 126 | )?; 127 | 128 | let coordinator_embeddings: Vec = coordinators 129 | .iter() 130 | .cloned() 131 | .zip(name_embeddings.iter().cloned()) 132 | .map(|(coordinator, name_embedding)| CoordinatorEmbedding { 133 | email: coordinator.email, 134 | name: name_embedding, 135 | }) 136 | .collect(); 137 | Ok(coordinator_embeddings) 138 | } 139 | 140 | /// Helper function to embed a list of passages 141 | /// Passages are prepended with "passage: " before being embedded 142 | /// This returns a Vec 143 | fn passage_embed( 144 | passages: Vec, 145 | model: &TextEmbedding, 146 | batch_size: Option, 147 | ) -> Result> { 148 | // for each passage, add passage: to the front of it 149 | let passages: Vec = passages 150 | .par_iter() 151 | .map(|x| format!("passage: {x}")) 152 | .collect(); 153 | model.embed(passages, batch_size) 154 | } 155 | 156 | /// Helper function to embed a query 157 | /// The query is prepended with "query: " before being embedded 158 | /// This returns an Embedding 159 | fn query_embed(query: &str, model: &TextEmbedding) -> Result { 160 | // add query: to the front of the query 161 | model 162 | .embed(vec![format!("query: {query}")], None) 163 | .map(|x| x[0].clone()) 164 | } 165 | -------------------------------------------------------------------------------- /backend/vector_store/src/main.rs: -------------------------------------------------------------------------------- 1 | use axum::extract::Query; 2 | use axum::extract::State; 3 | use axum::routing::get; 4 | use axum::{Json, Router}; 5 | use futures_util::pin_mut; 6 | use futures_util::stream::StreamExt; 7 | use serde::Deserialize; 8 | use sqlx::migrate; 9 | use std::env; 10 | use std::path::Path; 11 | use std::sync::Arc; 12 | 13 | mod db; 14 | use db::PostgresDB; 15 | 16 | mod populate; 17 | use populate::upsert_documents_from_path; 18 | 19 | mod embedding; 20 | use embedding::Embedder; 21 | 22 | #[derive(Clone)] 23 | struct Course { 24 | id: String, 25 | title: String, 26 | content: String, 27 | } 28 | 29 | #[derive(Debug, Deserialize, Clone)] 30 | struct Coordinator { 31 | name: String, 32 | email: String, 33 | } 34 | 35 | #[derive(Clone)] 36 | struct AppState { 37 | db: Arc, 38 | embedder: Arc, 39 | } 40 | 41 | #[derive(Debug, Deserialize)] 42 | struct SearchQuery { 43 | query: String, 44 | } 45 | 46 | /// Main function that starts the server 47 | /// This function initializes the database, runs the migrations, and starts the server 48 | /// A temporary functionality this server has is to populate the database with the documents 49 | /// in the new_json directory, this will be removed in the future 50 | /// The server has two endpoints: 51 | /// - /health: returns "healthy" if the server is running 52 | /// - /search: returns a list of course ids that most closely match the query 53 | /// 54 | /// The server also has two background tasks that run every 6 hours: 55 | /// - populate_coordinator_embeddings: updates the coordinator embeddings in the database 56 | /// - populate_course_embeddings: updates the course embeddings in the database 57 | /// These tasks use the embedder to generate the embeddings 58 | #[tokio::main] 59 | async fn main() { 60 | let conn_string = env::var("POSTGRES_URL").expect( 61 | "POSTGRES_URL not set, it should be in the format postgres://user:password@host/db", 62 | ); 63 | 64 | let db = PostgresDB::new(&conn_string) 65 | .await 66 | .expect("Failed to create database"); 67 | migrate!("./migrations") 68 | .run(&db.pool) 69 | .await 70 | .expect("Failed to run migrations"); 71 | 72 | let data_dir = env::var("DATA_DIR").expect("DATA_DIR not set"); 73 | let new_json_dir = data_dir.to_owned() + "new_json/"; 74 | let path = Path::new(&new_json_dir); 75 | upsert_documents_from_path(&db, path) 76 | .await 77 | .expect("Failed to upsert documents from path into database"); 78 | 79 | let state = AppState { 80 | db: Arc::new(db), 81 | embedder: Arc::new(Embedder::new()), 82 | }; 83 | 84 | const SYNC_INTERVAL: u64 = 60 * 60 * 6; 85 | 86 | let coordinator_state = state.clone(); 87 | tokio::spawn(async move { 88 | loop { 89 | populate_coordinator_embeddings(&coordinator_state.db, &coordinator_state.embedder) 90 | .await; 91 | println!("done populating coordinator embeddings"); 92 | tokio::time::sleep(tokio::time::Duration::from_secs(SYNC_INTERVAL)).await; 93 | } 94 | }); 95 | 96 | let course_state = state.clone(); 97 | tokio::spawn(async move { 98 | loop { 99 | populate_course_embeddings(&course_state.db, &course_state.embedder).await; 100 | println!("done populating course embeddings"); 101 | tokio::time::sleep(tokio::time::Duration::from_secs(SYNC_INTERVAL)).await; 102 | } 103 | }); 104 | 105 | let app = Router::new() 106 | .route("/health", get(|| async { "healthy" })) 107 | .route("/search", get(search)) 108 | .with_state(state); 109 | let addr = env::var("SERVER_ADDRESS").expect("SERVER_ADDRESS must be set"); 110 | let port = env::var("SERVER_PORT").expect("SERVER_PORT must be set"); 111 | let listener = tokio::net::TcpListener::bind(&format!("{addr}:{port}")) 112 | .await 113 | .expect("Failed to bind to port"); 114 | println!("Listening on {addr}:{port}"); 115 | axum::serve(listener, app) 116 | .await 117 | .expect("Failed to start server, this should not happen"); 118 | } 119 | 120 | /// Search endpoint that takes a query parameter and returns a list of the course ids that 121 | /// most closely match the query 122 | async fn search( 123 | Query(query): Query, 124 | State(state): State, 125 | ) -> Json> { 126 | let query_embedding = state.embedder.embed_query(query.query); 127 | let db = &state.db; 128 | let ids = db 129 | .get_most_relevant_course_ids(&query_embedding) 130 | .await 131 | .expect("Failed to get most relevant course ids"); 132 | Json(ids) 133 | } 134 | 135 | /// Upserts the coordinator embeddings into the database using the coordinator information 136 | /// from the database and the embedder to generate the embeddings 137 | async fn populate_coordinator_embeddings(db: &PostgresDB, embedder: &Embedder) { 138 | let missing_coordinators = db 139 | .get_missing_embedding_email_names() 140 | .await 141 | .expect("Failed to get missing coordinators"); 142 | 143 | println!("missing coordinators: {}", missing_coordinators.len()); 144 | 145 | let embedding_stream = embedder.embed_coordinators(missing_coordinators); 146 | pin_mut!(embedding_stream); 147 | 148 | while let Some(embedded_coordinator) = embedding_stream.next().await { 149 | db.insert_coordinator_embedding(embedded_coordinator) 150 | .await 151 | .expect("Failed to insert coordinator embedding"); 152 | } 153 | } 154 | 155 | /// Upserts the course embeddings into the database using the course information 156 | /// from the database and the embedder to generate the embeddings 157 | async fn populate_course_embeddings(db: &PostgresDB, embedder: &Embedder) { 158 | let outdated_embeddings = db 159 | .get_outdated_embedding_course_ids() 160 | .await 161 | .expect("Failed to get outdated embeddings"); 162 | 163 | let outdated_courses: Vec = db 164 | .get_courses_by_ids(&outdated_embeddings) 165 | .await 166 | .expect("Failed to get courses by ids"); 167 | 168 | println!("missing documents: {}", outdated_courses.len()); 169 | 170 | let embedding_stream = embedder.embed_courses(outdated_courses); 171 | pin_mut!(embedding_stream); 172 | 173 | while let Some(embedded_document) = embedding_stream.next().await { 174 | db.insert_course_embedding(embedded_document) 175 | .await 176 | .expect("Failed to insert course embedding"); 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /backend/db-manager/src/course_scraper/watcher.clj: -------------------------------------------------------------------------------- 1 | (ns course-scraper.watcher 2 | (:require [clojure.zip :as zip] 3 | [clojure.xml :as xml] 4 | [clojure.java.io :as io] 5 | [org.httpkit.client :as http] 6 | [clojure.java.shell :as shell] 7 | [datascript.core :as d] 8 | [db-manager.db :refer [schema]] 9 | [exam-scraper.core :refer [get-itx-courses-from-dir patch-courses-w-itx]] 10 | [course-scraper.upsert :refer [try-finding-stats transactions-w-stats read-json-file]]) 11 | (:import (javax.net.ssl SSLEngine SSLParameters SNIHostName) 12 | (java.net URI)) 13 | 14 | (:gen-class)) 15 | 16 | (def data-dir "../../data/") 17 | (def pages-dir "../../data/pages") 18 | (def json-dir "../../data/new_json") 19 | (def exam-pdfs-dir "../../data/exam_pdfs") 20 | (def stats-dir (str data-dir "statistics/")) 21 | 22 | 23 | (defn grab-info-from-course [course] 24 | (let [content (:content course) 25 | loc (first (filter #(= (:tag %) :loc) content)) 26 | lastmod (first (filter #(= (:tag %) :lastmod) content)) 27 | ; convert YYYY-MM-DD to java.time 28 | ldt (java.time.LocalDate/parse (first (:content lastmod))) 29 | instant (.atStartOfDay ldt (java.time.ZoneId/of "Europe/Copenhagen")) 30 | timestamp (.toEpochSecond instant)] 31 | 32 | {:loc (first (:content loc)) 33 | :id (last (clojure.string/split (first (:content loc)) #"/")) 34 | :lastmod (first (:content lastmod)) 35 | :timestamp (* 1000 timestamp)})) 36 | 37 | (defn grab-mod-date 38 | "Grabs the modification date of the file with the course-id as name or 0 if it doesn't exist" 39 | [course-id] 40 | (let [file (io/file pages-dir (str course-id ".html"))] 41 | (if (.exists file) 42 | (.lastModified file) 43 | 0))) 44 | 45 | 46 | (defn sitemap-watcher 47 | "Watches the course sitemap for last-mod newer than time" 48 | [callback conn] 49 | (let [newly-scraped (atom []) 50 | sitemap-course-ids (atom []) 51 | sitemap-url "https://kurser.ku.dk/sitemap.xml" 52 | sitemap-zipper (zip/xml-zip (xml/parse sitemap-url)) 53 | courses (-> sitemap-zipper 54 | zip/down 55 | ; skip the first element, which is the page index, then grab everything 56 | zip/right 57 | 58 | zip/rights)] 59 | 60 | 61 | ; for every course, grab mod date and check if it's newer than the file 62 | ; if it is, grab the info from the course and pass it to the callback 63 | (println "[course scraper]: Scraping courses") 64 | (doseq [course courses] 65 | (let [course-info (grab-info-from-course course) 66 | course-id (:id course-info) 67 | course-mod-date (grab-mod-date course-id) 68 | course-lastmod (:timestamp course-info)] 69 | (swap! sitemap-course-ids conj course-id) 70 | (when (> course-lastmod course-mod-date) 71 | (callback course-info newly-scraped)))) 72 | 73 | ; check if the sitemap lacks courses that are in the pages directory, if so, delete them 74 | (let [files (.list (io/file pages-dir))] 75 | (doseq [file files] 76 | (when-not (some #(= file (str % ".html")) @sitemap-course-ids) 77 | (println "[course scraper]: Deleting" file) 78 | (io/delete-file (io/file pages-dir file))))) 79 | 80 | ; go to sleep for 30 minutes and then do it again 81 | (println "[course scraper]: Finished scraping, going to sleep") 82 | (println "[course scraper]: Modified" (count @newly-scraped) "courses") 83 | 84 | (if-not (zero? (count @newly-scraped)) 85 | (let [result (future (shell/sh "rust_parser" pages-dir json-dir))] 86 | (println "[course parser] Running rust parser...") 87 | (println "[course parser] Parser stderr: " (:err @result)) 88 | (println "[course parser] Finished parsing courses")) 89 | 90 | (println "[course parser] No new courses, not running parser")) 91 | 92 | (println "[course scraper]: Updating database") 93 | (let [stats-finder #(try-finding-stats stats-dir %) 94 | ; FIXME: we already know whihc courses to take, this does extra work 95 | ; this currently takes all courses instead of updating the ones that are new 96 | courses (map read-json-file (drop 1 (file-seq (clojure.java.io/file json-dir)))) 97 | itx-course-ids (get-itx-courses-from-dir exam-pdfs-dir) 98 | patched-courses (patch-courses-w-itx courses itx-course-ids)] 99 | ; FIXME: this is a hack and we should just drop the workflows and exams 100 | (d/reset-conn! conn (d/empty-db schema)) 101 | 102 | (d/transact! conn (transactions-w-stats stats-finder patched-courses))) 103 | (println "[course scraper]: Finished updating database") 104 | 105 | (reset! newly-scraped []) 106 | (Thread/sleep (* 1000 60 60)) ;; 1 hour, unit is ms 107 | (recur callback conn))) 108 | 109 | ; Magical snippet of code that allows us to use SNI with http-kit 110 | ; https://kumarshantanu.medium.com/using-server-name-indication-sni-with-http-kit-client-f7d92954e165 111 | (defn sni-configure 112 | [^SSLEngine ssl-engine ^URI uri] 113 | (let [^SSLParameters ssl-params (.getSSLParameters ssl-engine)] 114 | (.setServerNames ssl-params [(SNIHostName. (.getHost uri))]) 115 | (.setSSLParameters ssl-engine ssl-params))) 116 | 117 | (def client (http/make-client {:ssl-configurer sni-configure})) 118 | (def options {:client client :timeout (* 1000 60 5)}) 119 | 120 | (defn scrape-course 121 | "Scrapes the course page and writes it to disk, the 300ms sleep is to avoid DOSing KU" 122 | [course newly-scraped] 123 | (let [loc (:loc course)] 124 | (println "[course scraper]: Scraping" loc) 125 | (http/get loc options 126 | (fn [{:keys [status headers body error]}] ;; asynchronous response handling 127 | (if error 128 | (println "[course scraper]: Failed, exception is " error) 129 | (do 130 | (println "[course scraper]: Writing " loc) 131 | (spit (str pages-dir "/" (:id course) ".html") body) 132 | (swap! newly-scraped conj course)))))) 133 | (Thread/sleep 300)) 134 | 135 | (defn generate-url-combinations 136 | "KU has not given us any useful API and since the exams don't always correspond to the course's block 137 | we have to generate all combinations of Summer/Winter and the years from now to 2020" 138 | [course-id] 139 | (let [base-url "https://karakterstatistik.stads.ku.dk/Histogram/"] 140 | ; generate all combinations of year from now to 2020 and semester (summer, winter) 141 | (for [year (range (.getYear (java.time.LocalDate/now)) 2020 -1) 142 | semester ["Summer" "Winter"]] 143 | (str base-url course-id "/" semester "-" year)))) 144 | -------------------------------------------------------------------------------- /backend/db-manager/src/statistics/core.clj: -------------------------------------------------------------------------------- 1 | (ns statistics.core 2 | (:import (org.jsoup Jsoup)) 3 | (:require 4 | [clojure.data.json :as json] 5 | [clojure.java.io :as io] 6 | [clojure.string :as str] 7 | [statistics.utils :refer [stats]] 8 | [clj-http.client :as client]) 9 | (:gen-class)) 10 | 11 | (def data-dir "../../data/") 12 | (def json-dir (str data-dir "new_json/")) 13 | (def out-dir (str data-dir "statistics/")) 14 | 15 | 16 | (defn parse-block [block] 17 | (case block 18 | "One" 1 19 | "Two" 2 20 | "Three" 3 21 | "Four" 4 22 | "Summer" 5 23 | 0)) 24 | 25 | ; take a list of blocks in strings "One", "Two", "Three", "Four" and find the smallest 26 | (defn get-first-block [blocks] 27 | (->> blocks 28 | (map parse-block) 29 | (apply min))) 30 | 31 | (defn read-json 32 | "Read a json file and return the data as a map" 33 | [file] 34 | (let [old-course (json/read-str (slurp (str json-dir file)) :key-fn keyword) 35 | temp (assoc old-course :course-id (get-in old-course [:info :id]))] 36 | (assoc temp :start-block (get-first-block (get-in old-course [:info :block]))))) 37 | 38 | 39 | (defn query-stads 40 | "This should make a POST request with a form to the stats website, this returns a html table or nil" 41 | [course-info] 42 | (let [searchText (str/trim (str/replace (:title course-info) #"\(.*\)" "")) 43 | block (str "B" (:start-block course-info)) 44 | url "https://karakterstatistik.stads.ku.dk/Search/Courses" 45 | response (client/post url {:form-params 46 | {:searchText searchText 47 | :block block 48 | ; This number corresponds to the faculty of science 49 | :faculty "1868"} 50 | })] 51 | (if (= 200 (:status response)) 52 | (:body response) 53 | (do 54 | (println "[statistics] Error fetching: " url) 55 | (println "[statistics] Status code: " (:status response)) 56 | nil)))) 57 | 58 | 59 | (defn grab-urls [query-html] ; we get a table, the second row contains two tds, the second contains several a tags) 60 | (let [table (-> (str query-html) 61 | Jsoup/parse 62 | (.getElementsByClass "searchResultTable") 63 | first 64 | (.getElementsByTag "tr"))] 65 | ; check the number of trs, if there is less than 2, we return nil 66 | (if (= 1 (count table)) 67 | nil 68 | (let [second-row (second table) 69 | tds (.getElementsByTag second-row "td")] 70 | (map (fn [a] (.attr a "href")) (.getElementsByTag (second tds) "a")))))) 71 | 72 | (println (grab-urls (query-stads {:title "Dyrs livsformer og funktion"}))) 73 | 74 | 75 | ; HOW TO GENERATE THE COURSE STATISTICS PAGE URL: 76 | ; start with base https://karakterstatistik.stads.ku.dk/Histogram/ 77 | ; add the course-id which also exists in each course map 78 | ; the course ID has a "U" at the end, this has to be changed to an "E" for exams 79 | ; add semester which is "Winter" or "Summer" 80 | ; add year which is the year of the exam 81 | ; EXAMPLE: Advanced Algorithms and Data Structures (AADS) 82 | ; NDAA09023U - SCIENCE 83 | ; => 84 | ; https://karakterstatistik.stads.ku.dk/Histogram/NDAA09023E/Winter-2022 85 | (defn generate-url-combinations [course-id] 86 | (let [base-url "https://karakterstatistik.stads.ku.dk/Histogram/" 87 | ; The courses end with a U, but the exams end with an E 88 | exam-name (if (= \U (last course-id)) 89 | (str/replace course-id "U" "E") 90 | course-id)] 91 | ; Generate all combinations of year from now to 2020 and semester (summer, winter) 92 | (for [year (range (.getYear (java.time.LocalDate/now)) 2020 -1) 93 | semester ["Summer" "Winter"]] 94 | {:url (str base-url exam-name "/" semester "-" year) 95 | :course-id course-id 96 | :year year}))) 97 | 98 | (defn try-scraping 99 | "Tries to scrape the given url and returns nil if it fails, 100 | if the error code is 500 it returns nil, otherwise it throws an exception" 101 | [url] 102 | (println "[statistics] Trying: " url) 103 | (try (.get (Jsoup/connect url)) 104 | (catch Exception e 105 | (let [status (.getStatusCode e)] 106 | (if (= 500 status) 107 | nil 108 | (do 109 | (println "[statistics] Error fetching: " url) 110 | (println "[statistics] Status code: " status) 111 | (throw e))))))) 112 | 113 | (defn existing-json? [course-info] 114 | (let [file (io/file (str out-dir (:course-id course-info) ".json"))] 115 | (if (.exists file) 116 | (let [data (json/read (io/reader file))] 117 | (if (not= (:year data) "2023") 118 | false 119 | (= (:re-exam data) nil))) 120 | true))) 121 | ; find all jsons 122 | ; TODO: refactor this since we arent using the start block anymore 123 | (def course-infos-init (for [file (file-seq (io/file json-dir)) 124 | :when (.endsWith (.getName file) ".json")] 125 | (read-json (.getName file)))) 126 | ;(def course-infos-init [{:course-id "NNEB19009U"}]) 127 | 128 | (println "number of courses: " (count course-infos-init)) 129 | 130 | ; The exams don't ever change, so we only need to fetch them once 131 | ; TODO: this should not be filtering out courses that haven't had their re-exam yet 132 | (def course-infos (filter existing-json? course-infos-init)) 133 | 134 | ; Checks for colspan tag in html, which indicates that the table contains the exam data 135 | (defn contains-colspan? [elem] 136 | (let [attributes (.attributes elem)] 137 | (= "2" (.get attributes "colspan")))) 138 | ;TODO make sure both exam and reexam data is contained in HTML 139 | (defn fetch-html [html] 140 | (filter contains-colspan? (-> (str html) 141 | Jsoup/parse 142 | (.getElementsByTag "td")))) 143 | 144 | ; Check if the exam table exists 145 | (defn empty-exam? [table] 146 | (not (< (count (.getElementsByTag table "td")) 3))) 147 | 148 | (defn translate-grade [grade] 149 | (case (str/lower-case grade) 150 | "ej mødt" "Absent" 151 | "ikke bestået" "Failed" 152 | "bestået" "Passed" 153 | grade)) 154 | 155 | ; The exams are stored in html tables, where each row has three columns (grade, count, percentage) 156 | ; We only grab the count and grade 157 | (defn grade-count-reducer [grades-list three-elems] 158 | (conj grades-list {:grade (translate-grade (.text (first three-elems))) 159 | :count (Integer/parseInt (.text (second three-elems)))})) 160 | 161 | (defn fetch-data [table] 162 | (if (empty-exam? table) 163 | (reduce grade-count-reducer [] (partition 3 (-> (second (.getElementsByTag table "tbody")) 164 | (.getElementsByTag "td")))) 165 | nil)) 166 | 167 | (defn add-stats [exam-table] 168 | (when-not (nil? exam-table) 169 | (stats exam-table))) 170 | 171 | (defn build-stats-json [tables] 172 | (let [exam-table (first tables) 173 | re-exam-table (second tables)] 174 | {:exam (add-stats (fetch-data exam-table)) 175 | :re-exam (add-stats (fetch-data re-exam-table))})) 176 | 177 | (defn save-exam [tables course-id year] 178 | (spit (str out-dir course-id ".json") (json/write-str (assoc tables :course_id course-id :year year)))) 179 | 180 | (defn parse-to-tables [html] 181 | (build-stats-json (fetch-html (:html html)))) 182 | 183 | (defn spit-all-to-json [exam-data-seq] 184 | (doseq [exam-data exam-data-seq] 185 | (when (some? exam-data) 186 | (let [course-id (:course-id exam-data) 187 | year (:year exam-data) 188 | tables (select-keys exam-data [:exam :re-exam])] 189 | (save-exam tables course-id year))))) 190 | 191 | 192 | 193 | (defn get-statistics-data 194 | "Takes a map with the course-id, year and url it if it exists, 195 | otherwise it returns nil" 196 | [course] 197 | (let [course-id (:course-id course) 198 | ;combinations (generate-url-combinations course-id)] 199 | urls (grab-urls (query-stads course)) 200 | ;FIXME: year is no longer being passed to the combinations 201 | combinations (map (fn [url] {:url url :course-id course-id}) urls) 202 | ; merge with combinations from (generate-url-combinations course-id) 203 | combinations (concat combinations (generate-url-combinations course-id))] 204 | (println "[statistics] Trying to find exam for: " course-id) 205 | (loop [combinations combinations] 206 | (when-not (empty? combinations) 207 | (let [combination (first combinations) 208 | url (:url combination) 209 | html (try-scraping url) 210 | exam-data (try (parse-to-tables {:html html}) 211 | (catch Exception e 212 | (println "[statistics] Error parsing: " url) 213 | nil))] 214 | (if (nil? (:exam exam-data)) 215 | ; Sleep 200ms to be nice to the server 216 | (do (Thread/sleep 200) 217 | (recur (rest combinations))) 218 | (do 219 | (println "[statistics] Found exam for: " course-id) 220 | (merge combination exam-data)))))))) 221 | 222 | (def exam-data-seq (for [course course-infos] 223 | (get-statistics-data course))) 224 | 225 | 226 | 227 | (defn stats-watcher 228 | [] 229 | (io/make-parents (str out-dir "anything here")) 230 | (spit-all-to-json exam-data-seq) 231 | (Thread/sleep (* 1000 60 60 24)) 232 | (recur)) 233 | -------------------------------------------------------------------------------- /frontend/src/components/OverviewCard/OverviewCard.svelte: -------------------------------------------------------------------------------- 1 | 63 | 64 | 68 |
69 |
70 |
71 |

76 | {course.title} 77 |

78 |

79 | {course.id} - SCIENCE 80 |

81 |
82 | 83 | 84 | 85 | 88 | 91 | 92 | 93 | 123 | 124 | 136 | 137 | 138 |
86 | {denest_type_maps(course.degree).join(", ")} 87 | 89 | ECTS: {course.ects} 90 |
94 | Block(s): {coerce_blocks_to_int( 95 | denest_type_maps(course.block) 96 | ) 97 | .sort((a, b) => a - b) // Ensure numeric sorting 98 | .reduce((acc: number[][], curr, index, arr) => { 99 | // Convert consecutive numbers to ranges 100 | if ( 101 | index === 0 || 102 | curr - arr[index - 1] !== 1 103 | ) { 104 | acc.push([curr]); // Start a new range 105 | } else { 106 | acc[acc.length - 1][1] = curr; // Extend the current range 107 | } 108 | return acc; 109 | }, []) 110 | .map((range) => { 111 | if (range.length === 2) { 112 | return `${range[0]}-${range[1]}`; 113 | } else if (range[0] === 5) { 114 | // Map block 5 to "Summer" 115 | return "Summer"; 116 | } else { 117 | return `${range[0]}`; 118 | } 119 | }) 120 | // Format ranges or single values 121 | .join(", ")} 122 | 125 | Group(s): {denest_type_maps(course.schedule) 126 | // TODO: acutally process the string schedules instead of calling them other 127 | .map((x) => 128 | typeof x === "object" ? "Other" : x 129 | ) 130 | .map((x) => 131 | x == "OutsideOfSchedule" ? "Other" : x 132 | ) 133 | .sort() 134 | .join(", ")} 135 |
139 |
140 | 141 |

142 | {@html course.summary.length > 200 143 | ? course.summary.substring(0, 200) + "..." 144 | : course.summary} 145 |

146 |
147 |
148 |
149 | {#each course.exam as exam} 150 |

151 | {exam.type == "ContinuousAssessment" 152 | ? "Continuous Assesment" 153 | : exam.type} 154 | {#if exam.duration} 155 | ({formatExamDuration(exam.duration)}) 156 | {/if} 157 |

158 | {/each} 159 |
160 | 161 | {#if course.statistics} 162 | 163 | 164 | 165 | 168 | 169 | 178 | 179 | 180 | 181 | 186 | 187 | 188 | 191 | 197 | 198 | 199 |
166 | Pass 170 | {course.statistics["pass-rate"] == null 171 | ? "N/A" 172 | : `${ 173 | Math.round( 174 | course.statistics["pass-rate"] * 10000 175 | ) / 100 176 | }%`} 177 |
Median 182 | {course.statistics.median == null 183 | ? "N/A" 184 | : course.statistics.median} 185 |
189 | Average 190 | 192 | {course.statistics.mean == null 193 | ? "N/A" 194 | : Math.round(course.statistics.mean * 100) / 195 | 100} 196 |
200 | {:else} 201 | 202 | 203 | 204 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 218 | 219 | 220 | 221 |
205 | PassN/A
MedianN/A
216 | Average 217 | N/A
222 | {/if} 223 |
224 | 225 |
226 | {#if course.language.filter((lang) => lang.name == "Danish").length > 0} 227 | 228 | {:else} 229 | 230 | {/if} 231 |
232 |
233 | -------------------------------------------------------------------------------- /backend/vector_store/src/db.rs: -------------------------------------------------------------------------------- 1 | use super::{Coordinator, Course}; 2 | use crate::embedding::{CoordinatorEmbedding, CourseEmbedding}; 3 | use crate::populate::Document; 4 | use anyhow::Result; 5 | use pgvector::Vector; 6 | use sqlx::postgres::{PgPool, PgPoolOptions}; 7 | use sqlx::{query, Row}; 8 | 9 | pub struct PostgresDB { 10 | pub pool: PgPool, 11 | } 12 | impl PostgresDB { 13 | pub async fn new(db_url: &str) -> Result { 14 | let pool = PgPoolOptions::new() 15 | .max_connections(3) 16 | .connect(db_url) 17 | .await?; 18 | Ok(Self { pool }) 19 | } 20 | 21 | /// Returns all the course ids that have outdated or non-existent embeddings 22 | /// This is computed by checking if the course modified timestamp is greater than the last modified 23 | /// timestamp of the title embedding or the content embedding 24 | pub async fn get_outdated_embedding_course_ids(&self) -> Result> { 25 | let result = query!( 26 | "SELECT c.id 27 | FROM course c 28 | LEFT JOIN title_embedding te ON c.id = te.course_id 29 | LEFT JOIN content_embedding ce ON c.id = ce.course_id 30 | WHERE 31 | c.last_modified > COALESCE(te.last_modified, to_timestamp(0)) OR 32 | c.last_modified > COALESCE(ce.last_modified, to_timestamp(0))" 33 | ) 34 | .fetch_all(&self.pool) 35 | .await?; 36 | 37 | let mut ids: Vec = Vec::new(); 38 | for row in result { 39 | ids.push(row.id.expect("id")) 40 | } 41 | 42 | Ok(ids) 43 | } 44 | 45 | /// Returns all the coordinators in the Vec of coordinator emails that do not have an embedding 46 | pub async fn get_missing_embedding_email_names(&self) -> Result> { 47 | // Due to a weird bug, this has to not be a macroo query 48 | let result = query( 49 | "SELECT coordinator.email, coordinator.full_name 50 | FROM coordinator 51 | LEFT JOIN name_embedding ne ON coordinator.email = ne.email 52 | WHERE ne.embedding IS NULL", 53 | ) 54 | .fetch_all(&self.pool) 55 | .await?; 56 | 57 | let mut coordinators = Vec::new(); 58 | for row in result { 59 | coordinators.push(Coordinator { 60 | email: row.try_get("email")?, 61 | name: row.try_get("full_name")?, 62 | }); 63 | } 64 | 65 | Ok(coordinators) 66 | } 67 | 68 | /// Returns all the courses in the Vec of course ids 69 | pub async fn get_courses_by_ids(&self, ids: &[String]) -> Result> { 70 | let mut courses = Vec::new(); 71 | 72 | let result = query!( 73 | "SELECT id, title, content FROM course WHERE id = ANY($1)", 74 | &ids 75 | ) 76 | .fetch_all(&self.pool) 77 | .await?; 78 | 79 | for row in result { 80 | let course = Course { 81 | id: row.id, 82 | title: row.title, 83 | content: row.content, 84 | }; 85 | courses.push(course); 86 | } 87 | 88 | Ok(courses) 89 | } 90 | 91 | /// Inserts the document into the database 92 | /// If the document already exists, it updates the title, content, and last_modified timestamp 93 | /// This is used by populate.rs but is not strictly required 94 | /// for the search functionality 95 | /// TODO: all insertion functionality should be moved out of this service 96 | pub async fn upsert_document(&self, document: &Document) -> Result<()> { 97 | // start by checking if the Document is the same as the one in the database 98 | // if it is, do nothing 99 | let result = query!( 100 | "SELECT title, content FROM course WHERE id = $1", 101 | document.info.id 102 | ) 103 | .fetch_optional(&self.pool) 104 | .await?; 105 | 106 | if let Some(row) = result { 107 | if row.title == document.title && row.content == document.description.content { 108 | return Ok(()); 109 | } 110 | } 111 | 112 | let mut tx = self.pool.begin().await?; 113 | 114 | query!( 115 | "INSERT INTO course (id, title, content) VALUES ($1, $2, $3) 116 | ON CONFLICT(id) DO UPDATE SET title = $2, content = $3, last_modified = CURRENT_TIMESTAMP", 117 | document.info.id, 118 | document.title, 119 | document.description.content 120 | ).execute(&mut *tx).await?; 121 | 122 | // A coordinator may have been removed, so we need to delete all coordinators for this course 123 | query!( 124 | "DELETE FROM course_coordinator WHERE course_id = $1", 125 | document.info.id 126 | ) 127 | .execute(&mut *tx) 128 | .await?; 129 | 130 | // no conflict, if the coordinator exists do nothing 131 | for coordinator in document.logistics.coordinators.iter() { 132 | query!( 133 | "INSERT INTO coordinator (email, full_name) VALUES ($1, $2) 134 | ON CONFLICT(email) DO NOTHING", 135 | coordinator.email, 136 | coordinator.name 137 | ) 138 | .execute(&mut *tx) 139 | .await?; 140 | 141 | query!( 142 | "INSERT INTO course_coordinator (course_id, email) VALUES ($1, $2)", 143 | document.info.id, 144 | coordinator.email 145 | ) 146 | .execute(&mut *tx) 147 | .await?; 148 | } 149 | 150 | tx.commit().await?; 151 | Ok(()) 152 | } 153 | 154 | /// Inserts the coordinator embedding into the database 155 | /// If the coordinator already exists, it does nothing, 156 | /// this is because we assume the names of the coordinators are immutable 157 | pub async fn insert_coordinator_embedding( 158 | &self, 159 | coordinator: CoordinatorEmbedding, 160 | ) -> Result<()> { 161 | query( 162 | "INSERT INTO name_embedding (email, embedding) VALUES ($1, $2) 163 | ON CONFLICT(email) DO NOTHING", 164 | ) 165 | .bind(coordinator.email) 166 | .bind(Vector::from(coordinator.name.to_owned())) 167 | .execute(&self.pool) 168 | .await?; 169 | Ok(()) 170 | } 171 | 172 | /// Inserts the course embedding into the database 173 | /// If the course already exists, it updates the embedding and the last_modified timestamp 174 | pub async fn insert_course_embedding(&self, course_embedding: CourseEmbedding) -> Result<()> { 175 | let mut tx = self.pool.begin().await?; 176 | query( 177 | "INSERT INTO title_embedding (course_id, embedding) VALUES ($1, $2) 178 | ON CONFLICT(course_id) DO UPDATE SET embedding = $2, last_modified = CURRENT_TIMESTAMP", 179 | ) 180 | .bind(&course_embedding.id) 181 | .bind(Vector::from(course_embedding.title.to_owned())) 182 | .execute(&mut *tx) 183 | .await?; 184 | 185 | query( 186 | "INSERT INTO content_embedding (course_id, embedding) VALUES ($1, $2) 187 | ON CONFLICT(course_id) DO UPDATE SET embedding = $2, last_modified = CURRENT_TIMESTAMP", 188 | ) 189 | .bind(course_embedding.id) 190 | .bind(Vector::from(course_embedding.content.to_owned())) 191 | .execute(&mut *tx) 192 | .await?; 193 | 194 | tx.commit().await?; 195 | Ok(()) 196 | } 197 | 198 | /// Returns the most relevant course ids based on the query embedding 199 | /// The title embedding is the title for that course 200 | /// The content embedding is the content for that course 201 | /// The coordinator embedding for each course is the most relevant coordinator for that course, 202 | /// if the coordinator's distance is greater than 0.8, it is clipped to 0.9, if it is less then it is halved 203 | /// to give it more importance in the total distance 204 | /// The relevance is then computed as the sum of the distances between the query embedding and the 205 | /// title embedding, content embedding, and coordinator embedding 206 | /// and is returned in ascending order (lower is better) 207 | pub async fn get_most_relevant_course_ids( 208 | &self, 209 | query_embedding: &[f32], 210 | ) -> Result> { 211 | let result = query(" 212 | WITH 213 | title_search AS ( 214 | SELECT 215 | course_id, embedding <-> $1 AS distance 216 | FROM 217 | title_embedding 218 | ), 219 | 220 | content_search AS ( 221 | SELECT 222 | course_id, embedding <-> $1 AS distance 223 | FROM 224 | content_embedding 225 | ), 226 | 227 | coordinator_search AS ( 228 | SELECT 229 | course_id, MIN( 230 | CASE 231 | WHEN embedding <-> $1 > 0.8 THEN 0.9 232 | ELSE (embedding <-> $1) / 2 233 | END) AS distance 234 | FROM 235 | course_coordinator 236 | INNER JOIN 237 | name_embedding 238 | ON 239 | course_coordinator.email = name_embedding.email 240 | GROUP BY course_id), 241 | 242 | 243 | 244 | combined_search AS ( 245 | SELECT 246 | course.id, 247 | title_search.distance + content_search.distance + coordinator_search.distance AS total_distance 248 | FROM 249 | title_search 250 | INNER JOIN 251 | content_search ON title_search.course_id = content_search.course_id 252 | INNER JOIN 253 | coordinator_search ON title_search.course_id = coordinator_search.course_id 254 | INNER JOIN 255 | course ON title_search.course_id = course.id 256 | ), 257 | 258 | ranked_courses AS ( 259 | SELECT 260 | id, 261 | total_distance, 262 | ROW_NUMBER() OVER (PARTITION BY id ORDER BY total_distance) AS rn 263 | FROM 264 | combined_search 265 | ) 266 | 267 | SELECT 268 | id 269 | FROM 270 | ranked_courses 271 | WHERE 272 | rn = 1 273 | ORDER BY 274 | total_distance; 275 | ") 276 | .bind(Vector::from(query_embedding.to_owned())) 277 | .fetch_all(&self.pool) 278 | .await?; 279 | let mut ids: Vec = Vec::new(); 280 | for row in result { 281 | ids.push(row.try_get("id")?); 282 | } 283 | 284 | Ok(ids) 285 | } 286 | } 287 | -------------------------------------------------------------------------------- /frontend/src/routes/course/[courseId]/+page.svelte: -------------------------------------------------------------------------------- 1 | 118 | 119 | 120 | {title} 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | {#if loading} 145 | 146 | {:else} 147 | 151 | Back 152 | 153 |
154 |
155 |

161 | {course.title} 162 |

163 |

{course.id} - SCIENCE

164 |
165 | 166 |
167 |
168 | {#if course["statistics"] != null && statistics != null} 169 |

170 | Passed: {Math.round(statistics["pass-rate"] * 100)}%, 171 | Average grade: {Math.round(statistics["mean"] * 100) / 172 | 100}, Median grade: {statistics["median"]} 173 |

174 | 175 | {/if} 176 |
177 |
178 |

Description

179 | 180 | {@html content} 181 | {@html learning_outcome} 182 | {#if course["recommended-qualifications"] != null} 183 |

Recommended qualifications

184 | {@html course["recommended-qualifications"]} 185 | {/if} 186 |
187 |
188 | 189 | {#each course.coordinator as emp} 190 |
191 |

{emp.name}

192 |
193 |

{emp.email}

194 | {/each} 195 |
196 | 197 | {#each course.exam as exam} 198 |

199 | {separate_capitals_letters(exam.type)} 200 | {#if exam.duration} 201 | - ({formatExamDuration(exam.duration)}) 202 | {/if} 203 |

204 | {/each} 205 |
206 | 207 |

208 | Level: {denest_type_maps(course.degree).join("\n")} 209 |

210 |

ECTS: {course.ects}

211 | 212 |

213 | Block(s): {coerce_blocks_to_int( 214 | denest_type_maps(course.block) 215 | ) 216 | .sort() 217 | .join(", ")} 218 |

219 |

220 | Group(s): {denest_type_maps(course.schedule) 221 | .sort() 222 | .join(", ")} 223 |

224 | 225 |

226 | 227 | 231 | Go to official page 232 | 233 |
234 | 235 |
    236 | {#each course.department as dep} 237 |
  • 238 | {separate_capitals_letters(dep.name)} 239 |
  • 240 | {/each} 241 |
242 |
243 | 244 | 245 | 246 | {#each course.workload as wl} 247 | 248 | 251 | 252 | 253 | {/each} 254 | 255 |
249 | {separate_capitals_letters(wl.type)}{wl.hours}h
256 |

Total: {totalHours}h

257 |
258 |
259 |
260 | 261 |
262 |
263 | {/if} 264 | -------------------------------------------------------------------------------- /backend/db-manager/src/db_manager/db.clj: -------------------------------------------------------------------------------- 1 | (ns db-manager.db 2 | (:require [datascript.core :as d] 3 | [org.httpkit.client :as http] 4 | [clojure.data.json :as json] 5 | [clojure.walk :refer [postwalk]])) 6 | 7 | (def many-ref {:db/valueType :db.type/ref 8 | :db/cardinality :db.cardinality/many}) 9 | (def one-ref {:db/valueType :db.type/ref 10 | :db/cardinality :db.cardinality/one}) 11 | (def unique {:db/unique :db.unique/identity}) 12 | 13 | (defn component [schema] 14 | (assoc schema :db/isComponent true)) 15 | 16 | (def schema 17 | {:course/id unique 18 | :course/title {} 19 | :course/ects {} 20 | 21 | :course/block many-ref 22 | 23 | :course/schedule many-ref 24 | 25 | :course/language many-ref 26 | :course/duration {:db/cardinality :db.cardinality/one} 27 | :course/degree many-ref 28 | :course/capacity {:db/cardinality :db.cardinality/one} 29 | :course/department many-ref 30 | :course/faculty one-ref 31 | :course/coordinator many-ref 32 | 33 | :course/workload (component many-ref) 34 | 35 | :course/exam (component many-ref) 36 | :course/content {} 37 | :course/learning-outcome {} 38 | :course/recommended-qualifications {} 39 | :course/statistics (component one-ref) 40 | 41 | :schedule/type unique 42 | :block/type unique 43 | :faculty/name unique 44 | :department/name unique 45 | :degree/type unique 46 | :language/name unique 47 | :employee/email unique 48 | :employee/name {} 49 | :statistics/mean {:db/cardinality :db.cardinality/one} 50 | :statistics/median {:db/cardinality :db.cardinality/one} 51 | :statistics/pass-rate {:db/cardinality :db.cardinality/one}}) 52 | 53 | (defn convert-coordinator 54 | "Convert a coordinator map from rust parser to a datascript map" 55 | [coordinator] 56 | (let [name (get coordinator "name") 57 | email (get coordinator "email")] 58 | {:employee/name name 59 | :employee/email email})) 60 | (defn convert-workload 61 | "Convert a workload map from rust parser to a datascript map" 62 | [workload] 63 | (let [type (get workload "workload_type") 64 | hours (get workload "hours")] 65 | {:workload/type type 66 | :workload/hours hours})) 67 | (defn convert-exam [exam] 68 | ; this can either be a string or a map, if its a string then it has no duration 69 | (if (string? exam) 70 | {:exam/type exam} 71 | ; the key is the exam type, the value is the duration 72 | ; ensure that the map is exactly 1 element 73 | (if (= 1 (count exam)) 74 | (let [[exam-type duration] (first exam)] 75 | {:exam/type exam-type 76 | :exam/duration duration}) 77 | (throw (Exception. "Exam map has more than 1 element, this should be impossible"))))) 78 | 79 | (defn remove-nils 80 | "As hinted by the name, it traverses the entire map and removes all fields with nils 81 | This is necessary because the rust parser returns a lot of nils, and datascript does not like nils 82 | Snippet from https://stackoverflow.com/questions/3937661/remove-nil-values-from-a-map" 83 | [m] 84 | (let [f (fn [[k v]] (when v [k v]))] 85 | (postwalk (fn [x] (if (map? x) (into {} (map f x)) x)) m))) 86 | 87 | (defn remove-db-ids 88 | [coll] 89 | (postwalk (fn [x] (if (map? x) (dissoc x :db/id) x)) coll)) 90 | 91 | (defn course-to-transaction [course-map] 92 | (let [id (get-in course-map ["info" "id"]) 93 | title (get course-map "title") 94 | ects (get-in course-map ["info" "ects"]) 95 | blocks (get-in course-map ["info" "block"]) 96 | schedules (get-in course-map ["info" "schedule"]) 97 | languages (get-in course-map ["info" "language"]) 98 | duration (get-in course-map ["info" "duration"]) 99 | degrees (get-in course-map ["info" "degree"]) 100 | capacity (get-in course-map ["info" "capacity"]) 101 | departments (get-in course-map ["logistics" "departments"]) 102 | faculty (get-in course-map ["logistics" "faculty"]) 103 | coordinators (map convert-coordinator (get-in course-map ["logistics" "coordinators"])) 104 | workloads (map convert-workload (get course-map "workloads")) 105 | exams (map convert-exam (get course-map "exams")) 106 | content (get-in course-map ["description" "content"]) 107 | learning-outcome (get-in course-map ["description" "learning_outcome"]) 108 | recommended-qualifications (get-in course-map ["description" "recommended_qualifications"]) 109 | summary (get-in course-map ["description" "summary"])] 110 | ; FIXME: Why is this here 111 | (when (empty? departments) 112 | (println "Course " title " has no departments")) 113 | {:course/id id 114 | :course/title title 115 | :course/ects ects 116 | :course/block (mapv #(hash-map :block/type %) blocks) 117 | :course/schedule (mapv #(hash-map :schedule/type %) schedules) 118 | :course/language (mapv #(hash-map :language/name %) languages) 119 | :course/duration duration 120 | :course/degree (mapv #(hash-map :degree/type %) degrees) 121 | :course/capacity capacity 122 | :course/department (mapv #(hash-map :department/name %) departments) 123 | :course/faculty (hash-map :faculty/name faculty) 124 | :course/coordinator coordinators 125 | :course/workload workloads 126 | :course/exam exams 127 | :course/content content 128 | :course/learning-outcome learning-outcome 129 | :course/recommended-qualifications (if (nil? recommended-qualifications) "" recommended-qualifications) 130 | :course/summary summary})) 131 | 132 | (defn courses-to-transactions [courses] 133 | (map course-to-transaction courses)) 134 | 135 | (defn get-course-ids [conn] 136 | (let [course-ids (d/q '[:find ?id 137 | :where 138 | [?e :course/id ?id]] 139 | @conn)] 140 | ; this is a vector of vectors, we want a vector of strings 141 | (mapv first course-ids))) 142 | (defn get-course-by-id 143 | "Find all the detailed information about a course by its id" 144 | [conn course-id] 145 | (let [course (d/pull @conn '[* {:course/schedule [*] 146 | :course/exam [*] 147 | :course/degree [*] 148 | :course/block [*] 149 | :course/faculty [*] 150 | :course/department [*] 151 | :course/coordinator [*] 152 | :course/workload [*] 153 | :course/language [*] 154 | :course/statistics [*]}] 155 | [:course/id course-id])] 156 | ; remove summary since we already bring it along from content 157 | (remove-db-ids (dissoc course :course/summary)))) 158 | 159 | ; denest a vector of vectors 160 | (defn denest [v] 161 | (mapv first v)) 162 | 163 | (defn search-vector-store [query] 164 | ; send http request to localhost:4000/search 165 | (let [response @(http/get "http://vectorstore:4000/search" {:query-params {:query query}})] 166 | (if (= (:status response) 200) 167 | (let [body (:body response)] 168 | (json/read-str body)) 169 | (do 170 | (println response) 171 | (throw (Exception. "Search request failed, the vectorstore is down or unhealthy")))))) 172 | 173 | (defn query-course-ids [conn predicate-map] 174 | (let [blocks (get predicate-map :blocks) 175 | schedules (get predicate-map :schedules) 176 | exams (get predicate-map :exams) 177 | degrees (get predicate-map :degrees) 178 | departments (get predicate-map :departments) 179 | languages (get predicate-map :languages) 180 | search (get predicate-map :search) 181 | courses (denest (d/q (concat '[:find ?course-id :in $ 182 | :where 183 | [?e :course/block ?block] 184 | [?e :course/id ?course-id] 185 | [?e :course/schedule ?schedule] 186 | [?e :course/exam ?exam] 187 | [?e :course/degree ?degree] 188 | [?e :course/department ?department] 189 | [?e :course/language ?language]] 190 | (if (empty? blocks) 191 | [] 192 | (list (cons 'or (mapv (fn [block] (vector '?block ':block/type block)) blocks)))) 193 | 194 | (if (empty? schedules) 195 | [] 196 | (list (cons 'or (mapv (fn [schedule] (vector '?schedule ':schedule/type schedule)) schedules)))) 197 | 198 | (if (empty? exams) 199 | [] 200 | (list (cons 'or (mapv (fn [exam] (vector '?exam ':exam/type exam)) exams)))) 201 | 202 | (if (empty? degrees) 203 | [] 204 | (list (cons 'or (mapv (fn [degree] (vector '?degree ':degree/type degree)) degrees)))) 205 | 206 | (if (empty? departments) 207 | [] 208 | (list (cons 'or (mapv (fn [department] (vector '?department ':department/name department)) departments)))) 209 | (if (empty? languages) 210 | [] 211 | (list (cons 'or (mapv (fn [language] (vector '?language ':language/name language)) languages))))) 212 | @conn))] 213 | (if (empty? search) 214 | courses 215 | ; we get a list of IDs from the search vector store, we need to find all the courses in 216 | ; the returned courses which are in the vector store list whilst preserving the order 217 | (let [search-result (search-vector-store search)] 218 | (if (nil? search-result) 219 | courses 220 | ; perform an intersection of the two lists, but preserve the order of the first list 221 | (filter #(contains? (set courses) %) search-result)))))) 222 | 223 | (defn get-overviews-from-ids [conn ids] 224 | (d/pull-many @conn '[:course/id 225 | :course/title 226 | :course/ects 227 | :course/summary 228 | {:course/schedule [*] 229 | :course/block [*] 230 | :course/exam [*] 231 | :course/degree [*] 232 | :course/language [*] 233 | :course/statistics [:statistics/mean 234 | :statistics/median 235 | :statistics/pass-rate]}] 236 | (mapv #(vector :course/id %) ids))) 237 | 238 | (defn get-courses [conn predicate-map] 239 | (let [course-ids (query-course-ids conn predicate-map)] 240 | (map remove-db-ids (get-overviews-from-ids conn course-ids)))) 241 | -------------------------------------------------------------------------------- /backend/rust_parser/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | 3 | use std::time; 4 | pub mod parser; 5 | 6 | //const DEFAULT_DATA_DIR: &str = "../../data"; 7 | //const HTMLS_DIR: &str = "../../data/pages"; 8 | //const TEST_DIR: &str = "./test_data"; 9 | //const TEST_HTMLS_DIR: &str = "./test_data/pages"; 10 | //const JSON_DIR: &str = "../../data/new_json"; 11 | 12 | // make a function that takes a path and returns the number of fails and the total number of courses 13 | fn count_fails(htmls_dir: &str, json_dir: &str) -> (usize, usize) { 14 | let mut fails = 0; 15 | let mut passes = 0; 16 | let dir = std::fs::read_dir(htmls_dir).unwrap(); 17 | for entry in dir { 18 | let entry = entry.unwrap(); 19 | // read the string from the file 20 | let html = std::fs::read_to_string(entry.path()).unwrap(); 21 | // parse the string 22 | let course = parser::parse_course(&html); 23 | // if the error cause (this is an anyhow context) contains , then we ignore it and continue 24 | match course { 25 | Ok(c) => { 26 | // emit json to file 27 | let json = serde_json::to_string(&c).unwrap(); 28 | let path = format!("{}/{}.json", json_dir, c.info.id); 29 | std::fs::write(path, json).unwrap(); 30 | passes += 1; 31 | } 32 | 33 | Err(e) => { 34 | // if any of the causes contain , then we ignore it and continue 35 | if e.chain().any(|c| c.to_string().contains("")) { 36 | continue; 37 | } else { 38 | fails += 1; 39 | println!("Error: {e:?}\n\n"); 40 | } 41 | } 42 | } 43 | } 44 | (fails, passes) 45 | } 46 | 47 | fn main() { 48 | let args: Vec = env::args().collect(); 49 | let timer = time::Instant::now(); 50 | let html_dir = &args[1]; 51 | let json_dir = &args[2]; 52 | 53 | // print all files in the html directory 54 | let _dir = std::fs::read_dir(html_dir).unwrap(); 55 | println!("fails and total: {:?}", count_fails(html_dir, json_dir)); 56 | 57 | println!("Time elapsed: {:?}", timer.elapsed()); 58 | } 59 | 60 | #[cfg(test)] 61 | mod tests { 62 | use super::*; 63 | use pretty_assertions::assert_eq; 64 | const TEST_HTMLS_DIR: &str = "./test_data/pages"; 65 | use parser::{ 66 | Coordinator, Course, Department, Description, Exam, Faculty, LogisticInformation, Workload, 67 | WorkloadType, 68 | }; 69 | 70 | // check that there are files in PAGES_DIR 71 | #[test] 72 | fn test_pages_dir() { 73 | let dir = std::fs::read_dir(TEST_HTMLS_DIR).unwrap(); 74 | assert!(dir.count() > 0); 75 | } 76 | 77 | #[test] 78 | fn test_lsls10061_u() { 79 | let html = std::fs::read_to_string(format!("{TEST_HTMLS_DIR}/LSLS10061U.html")).unwrap(); 80 | let course = parser::parse_course(&html); 81 | let expected_course = Course { 82 | title: "International Naturforvaltning".to_string(), 83 | info: parser::CourseInformation::new( 84 | "LSLS10061U".to_string(), 85 | 7.5, 86 | vec![parser::Block::Two], 87 | vec![parser::Schedule::B], 88 | vec![parser::Language::Danish], 89 | parser::Duration::One, 90 | vec![parser::Degree::Bachelor], 91 | parser::Capacity(Some(70)), 92 | ), 93 | description: Description { 94 | content: "

Skov og naturressourcer globalt:

Globale klima- og vegetationszoner og deres økologi

Verdens naturressourcer, skove, nationalparker og\nnaturområder

Naturbeskyttelse og bevarelsesprogrammer

Globale skovopgørelser, fakta og trends, klassifikationer\u{a0}-\nog problematikker

Udfordringer i forhold til en bæredygtig anvendelse af\nnaturressourcer globalt

\u{a0}

International forvaltning:

International skov og naturressourceforvaltning - politisk,\ninstitutionelt og økonomisk

- Skovpolitik og regeringsførelse

- Internationale konventioner

- Regional EU politik og regelsæt\u{a0}

International skov og naturressourceforvaltning - decentrale\nsystemer\u{a0}

- Agroforestry

- Samfundsbaseret naturressourceforvaltning, indfødte folk og\nviden

Naturressourcebaserede konflikter

Klimaændringer ift. skov og natur

\u{a0}

Produktion, markedet og handel:

International skov og naturressourceforvaltning -\nkommercielt

- Plantage-\u{a0} koncessionsskovbrug

- Dyrkningssystemer og problematikker, optimering og\ngenbevarelse

- International handel med træprodukter \u{a0}

- Ulovlig hugst og handel

- Global og EU markedsbaseret regulering (skovcertificering,\nlegalitetsverificering, FLEGT)

".to_string(), 95 | learning_outcome: "

Kursets overordnede formål er at give den enkelte studerende\nviden om de vilkår, der danner rammerne for forvaltningen af skov\nog naturressourcer verden over. Det er også at give en\ngrundlæggende forståelse for hvordan de internationale rammer og\nvilkår har betydning for den måde, som skov og naturressourcer\nforvaltes i Danmark.



Viden:

- Kendskab til vækstvilkår og vegetationstyper globalt

- Kendskab til direkte og underliggende årsager til afskovning\nog over-udnyttelse af ressourcer

- Indsigt i koncessionsskovbrug, plantagedrift og\nagro-forestry

- Kendskab til internationale konventioner og EU lovgivning på\nskov og naturressourceområdet

- Kendsakbs til bevaringsklassifikationer og naturbeskyttelse i\npraksis

- Kendskab til de mest almindelige\u{a0}tømmertræarter

- Kendskab til det internationale markeds betydning for\nforvaltningen af skov- og naturressourcer

- Indsigt i mekanismerne der driver ulovlig hugst og handel med\ntræ

- Indsigt i markedsbaseret regulering

- Kendskab til de mest almindelige certificeringssystemer

- Indsigt i betydningen af\u{a0}klimaforandringer\u{a0}for den\nmåde skov og naturressourcer\u{a0} forvaltes\u{a0}i fremtiden

\u{a0}

Færdigheder:

- Redegøre for hvordan de naturgivne vækstvilkår i forskellige\nvegetationszoner har betydning for den måde som skov – og\nnaturressourcer i disse zoner bør forvaltes

- Redegøre for og diskutere hvordan problemer over-udnyttelse af\nskov og naturressourcer kan afhjælpes\n



\n- Redegøre for hvordan internationale konventioner og EU lovgivning\nhar betydning for forvaltningen af skov og naturressourcer\nlokalt

- Analysere globale markedstrends og forklare hvordan disse har\nbetydning for forvaltningen af skov- og naturressourcer

- Redegøre for de grundlæggende principper bag decentraliseret\nressourceforvaltning

- Identificere de udfordringer, som forvaltningen af\nfællesressourcer kan medføre

- Forklare hvordan markedsbaseret regulering af skov og\nnaturressourceforvaltning fungerer

- Redegøre for de vigtigste drivere bag ulovlig hugst og handel\nsamt hvordan det kan bekæmpes

- Opstille praktiske løsninger for hvordan negative konsekvenser\naf klimaforandringer kan afhjælpes

- Kan argumentere for og imod decentraliseret skov- og\nnaturressourceforvaltning



Kompetencer:

- Arbejde med skov og naturressourceforvaltning under de\nrammevilkår som internationale konventioner og EU lovgivning\ngiver

- Bidrage til at afhjælpe degradering og over-udnyttelse af skov\nog naturressourcer

- Indgå i samarbejder omkring forvaltningen af skov og\nnaturressource i forskellige klimazoner - både nationalt og\ninternationalt

- Tage informerede driftsbeslutninger under hensyntagen til\nnationale og internationale markedstrends

- Arbejde med certificering og legalitetsveriticering

- Integrere klimatilpasningshensyn i forvaltningen af skov og\nnaturressourcer

".to_string(), 96 | recommended_qualifications: Some("Basal forståelse for\nnaturforvaltning og/eller samfundsvidenskab.\n

\nSprogkundskaber til at kunne læse og forstå engelsk faglitteratur.\n

\nTilnærmelsesvis alt litteratur er på engelsk.".to_string()), 97 | summary: "Skov og naturressourcer globalt:Globale klima- og vegetationszoner og deres økologiVerdens naturressourcer, skove, nationalparker og\nnaturområderNaturbeskyttelse og bevarelsesprogrammerGlobale skovopgørelser, fakta og trends, klassifikationer\u{a0}-\nog problematikkerUdfordringer i forhold til en bæredygt".to_string(), 98 | }, 99 | exams: vec![Exam::Oral(Some(30))], 100 | /* 101 | * > workloads: [ 102 | > Workload { 103 | > workload_type: Lectures, 104 | > hours: 98.0, 105 | > }, 106 | > Workload { 107 | > workload_type: Preparation, 108 | > hours: 97.0, 109 | > }, 110 | > Workload { 111 | > workload_type: ProjectWork, 112 | > hours: 8.0, 113 | > }, 114 | > Workload { 115 | > workload_type: Exam, 116 | > hours: 1.0, 117 | > }, 118 | > ], 119 | */ 120 | workloads: vec![ 121 | Workload::new(WorkloadType::Lectures, 98.0), 122 | Workload::new(WorkloadType::Preparation, 97.0), 123 | Workload::new(WorkloadType::ProjectWork, 8.0), 124 | Workload::new(WorkloadType::Exam, 1.0), 125 | ], 126 | 127 | logistics: LogisticInformation::new( 128 | vec![Department::GeosciencesAndNaturalResourceManagement], 129 | Faculty::Science, 130 | vec![Coordinator::new( 131 | "Kirsten Carlsen".into(), 132 | "bdk748@alumni.ku.dk".into(), 133 | )], 134 | ), 135 | }; 136 | assert_eq!(expected_course, course.unwrap()); 137 | } 138 | 139 | // We need to ignore the duration if the course is known to be a summer course. 140 | #[ignore] 141 | #[test] 142 | fn test_nbik15000_u() { 143 | let html = std::fs::read_to_string(format!("{TEST_HTMLS_DIR}/NBIK15000U.html")).unwrap(); 144 | let course = parser::parse_course(&html); 145 | let expected_course = Course { 146 | title: "BAdvanced Plant Identification".to_string(), 147 | info: parser::CourseInformation::new( 148 | "NBIK15000U".to_string(), 149 | 7.5, 150 | vec![parser::Block::Summer], 151 | vec![parser::Schedule::B], // doesnt exist 152 | vec![parser::Language::English], 153 | parser::Duration::One, 154 | vec![parser::Degree::Master], 155 | parser::Capacity(Some(16)), 156 | ), 157 | description: Description { 158 | content: "".to_string(), 159 | learning_outcome: "".to_string(), 160 | recommended_qualifications: Some("".to_string()), 161 | summary: "".to_string(), 162 | }, 163 | exams: vec![Exam::Oral(Some(30))], 164 | workloads: Vec::new(), 165 | logistics: LogisticInformation::new(Vec::new(), Faculty::Science, Vec::new()), 166 | }; 167 | assert_eq!(expected_course, course.unwrap()); 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /backend/rust_parser/src/parser.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{bail, ensure, Context, Result}; 2 | use serde::Serialize; 3 | use tl::VDom; 4 | 5 | use crate::parser::course_information::parse_course_info; 6 | pub mod course_information; 7 | 8 | use crate::parser::exam_information::parse_course_exams; 9 | pub mod exam_information; 10 | 11 | use crate::parser::logistic_information::parse_logistic_info; 12 | pub mod logistic_information; 13 | 14 | use crate::parser::workload_information::parse_workloads; 15 | pub mod workload_information; 16 | 17 | use crate::parser::content_serialiser::grab_htmls; 18 | pub mod content_serialiser; 19 | 20 | #[derive(Debug, PartialEq, Serialize)] 21 | pub struct Course { 22 | pub title: String, 23 | pub info: CourseInformation, 24 | pub logistics: LogisticInformation, 25 | pub workloads: Vec, 26 | pub exams: Vec, 27 | pub description: Description, 28 | } 29 | 30 | #[derive(Debug)] 31 | pub enum CourseLanguage { 32 | English, 33 | Danish, 34 | } 35 | 36 | #[derive(Debug, PartialEq, Serialize)] 37 | pub struct CourseInformation { 38 | pub id: String, 39 | ects: f32, 40 | block: Vec, 41 | schedule: Vec, 42 | language: Vec, 43 | duration: Duration, 44 | degree: Vec, 45 | capacity: Capacity, 46 | } 47 | 48 | #[derive(Debug, PartialEq, Serialize)] 49 | pub enum Department { 50 | // Faculty of Science 51 | PlantAndEnvironmentalScience, 52 | Biology, 53 | ComputerScience, 54 | FoodAndResourceEconomics, 55 | FoodScience, 56 | GeosciencesAndNaturalResourceManagement, 57 | NutritionExerciseAndSports, 58 | Mathematics, 59 | ScienceEducation, 60 | // PlantAndEnvironmentalSciences, this never occurs as of end of 2023 61 | Chemistry, 62 | NielsBohrInstitute, 63 | NaturalHistoryMuseumOfDenmark, 64 | VeterinaryAndAnimalSciences, 65 | BiomedicalSciences, 66 | PublicHealth, 67 | DrugDesignAndPharmacology, 68 | CellularAndMolecularMedicine, 69 | Pharmacy, 70 | Globe, 71 | } 72 | impl Department { 73 | fn from_str(s: &str) -> Result { 74 | match s.replace('\n', " ").as_str() { 75 | "Department of Computer Science" | "Datalogisk Institut" => { 76 | Ok(Department::ComputerScience) 77 | } 78 | "Institut for Idræt og Ernæring" | "Department of Nutrition, Exercise and Sports" => { 79 | Ok(Department::NutritionExerciseAndSports) 80 | } 81 | "Statens Naturhistoriske Museum" | "The Natural History Museum of Denmark" => { 82 | Ok(Department::NaturalHistoryMuseumOfDenmark) 83 | } 84 | "Institut for Plante- og Miljøvidenskab" 85 | | "Department of Plant and Environmental Sciences" => { 86 | Ok(Department::PlantAndEnvironmentalScience) 87 | } 88 | "Institut for Matematiske Fag" | "Department of Mathematical Sciences" => { 89 | Ok(Department::Mathematics) 90 | } 91 | "Niels Bohr Institutet" | "The Niels Bohr Institute" => { 92 | Ok(Department::NielsBohrInstitute) 93 | } 94 | "Institut for Geovidenskab og Naturforvaltning" 95 | | "Department of Geoscience and Natural Resource Management" => { 96 | Ok(Department::GeosciencesAndNaturalResourceManagement) 97 | } 98 | "Institut for Naturfagenes Didaktik" | "Department of Science Education" => { 99 | Ok(Department::ScienceEducation) 100 | } 101 | "Institut for Fødevare- og Ressourceøkonomi" 102 | | "Department of Food and Resource Economics" => { 103 | Ok(Department::FoodAndResourceEconomics) 104 | } 105 | "Institut for Fødevarevidenskab" | "Department of Food Science" => { 106 | Ok(Department::FoodScience) 107 | } 108 | "Kemisk Institut" | "Department of Chemistry" => Ok(Department::Chemistry), 109 | "Biologisk Institut" | "Department of Biology" => Ok(Department::Biology), 110 | "Department of Veterinary and Animal Sciences" 111 | | "Institut for Veterinær- og Husdyrvidenskab (IVH)" => { 112 | Ok(Department::VeterinaryAndAnimalSciences) 113 | } 114 | "Department of Biomedical Sciences" => Ok(Department::BiomedicalSciences), 115 | "Department of Pharmacy" => Ok(Department::Pharmacy), 116 | "Institut for Lægemiddeldesign og Farmakologi" 117 | | "Department of Drug Design and Pharmacology" => { 118 | Ok(Department::DrugDesignAndPharmacology) 119 | } 120 | "Department of Cellular and Molecular Medicine" => { 121 | Ok(Department::CellularAndMolecularMedicine) 122 | } 123 | "Department of Public Health" => Ok(Department::PublicHealth), 124 | "Institut for Nordiske Studier og Sprogvidenskab" => { 125 | bail!("Nordic studies not supported ") 126 | } 127 | "GLOBE Institute" => Ok(Department::Globe), 128 | _ => bail!("Unknown department: {}", s), 129 | } 130 | } 131 | } 132 | 133 | #[derive(Debug, PartialEq, Serialize)] 134 | pub enum Faculty { 135 | Science, 136 | } 137 | 138 | #[derive(Debug, PartialEq, Serialize)] 139 | pub struct Coordinator { 140 | name: String, 141 | email: String, 142 | } 143 | impl Coordinator { 144 | pub fn new(name: String, email: String) -> Self { 145 | Self { name, email } 146 | } 147 | } 148 | 149 | #[derive(Debug, PartialEq, Serialize)] 150 | pub struct LogisticInformation { 151 | departments: Vec, 152 | faculty: Faculty, 153 | coordinators: Vec, 154 | } 155 | impl LogisticInformation { 156 | pub fn new( 157 | departments: Vec, 158 | faculty: Faculty, 159 | coordinators: Vec, 160 | ) -> Self { 161 | Self { 162 | departments, 163 | faculty, 164 | coordinators, 165 | } 166 | } 167 | } 168 | 169 | impl CourseInformation { 170 | #[allow(clippy::too_many_arguments)] 171 | pub fn new( 172 | id: String, 173 | ects: f32, 174 | block: Vec, 175 | schedule: Vec, 176 | language: Vec, 177 | duration: Duration, 178 | degree: Vec, 179 | capacity: Capacity, 180 | ) -> Self { 181 | Self { 182 | id, 183 | ects, 184 | block, 185 | schedule, 186 | language, 187 | duration, 188 | degree, 189 | capacity, 190 | } 191 | } 192 | } 193 | 194 | #[derive(Debug, PartialEq, Serialize)] 195 | pub enum Block { 196 | One, 197 | Two, 198 | Three, 199 | Four, 200 | Summer, 201 | Other(String), 202 | } 203 | 204 | #[derive(Debug, PartialEq, Serialize)] 205 | pub enum Schedule { 206 | A, 207 | B, 208 | C, 209 | D, 210 | OutsideOfSchedule, 211 | Other(String), 212 | } 213 | 214 | #[derive(Debug, PartialEq, Serialize)] 215 | pub enum Language { 216 | Danish, 217 | English, 218 | } 219 | 220 | #[derive(Debug, Eq, PartialEq, Serialize)] 221 | pub enum Duration { 222 | One, 223 | Two, 224 | DependsOnEcts, 225 | Custom(String), 226 | } 227 | 228 | #[derive(Debug, PartialEq, Eq, Ord, PartialOrd, Serialize)] 229 | pub enum Degree { 230 | Phd, 231 | Bachelor, 232 | Master, 233 | Propædeutik, 234 | } 235 | 236 | #[derive(Debug, PartialEq, Serialize)] 237 | pub struct Capacity(pub Option); 238 | 239 | #[derive(Debug, PartialEq, Serialize)] 240 | pub enum Exam { 241 | Oral(Option), 242 | Written(Option), 243 | Assignment(Option), 244 | ContinuousAssessment, 245 | Other, 246 | } 247 | 248 | #[derive(Debug, PartialEq, Serialize)] 249 | pub enum WorkloadType { 250 | Exam, 251 | ELearning, 252 | Laboratory, 253 | StudyGroup, 254 | TheoryExercises, 255 | FieldWork, 256 | Preparation, 257 | ExamPreparation, 258 | Excursions, 259 | Lectures, 260 | PracticalExercises, 261 | ProjectWork, 262 | Exercises, 263 | Guidance, 264 | ClassInstruction, 265 | PracticalTraining, 266 | Seminar, 267 | } 268 | impl WorkloadType { 269 | fn from_str(s: &str) -> Result { 270 | match s { 271 | "Forelæsninger" | "Lectures" => Ok(WorkloadType::Lectures), 272 | "Forberedelse (anslået)" | "Preparation" => Ok(WorkloadType::Preparation), 273 | "Theory exercises" | "Teoretiske øvelser" => Ok(WorkloadType::TheoryExercises), 274 | "Project work" | "Projektarbejde" => Ok(WorkloadType::ProjectWork), 275 | "Øvelser" | "Exercises" => Ok(WorkloadType::Exercises), 276 | "Eksamen" | "Exam" => Ok(WorkloadType::Exam), 277 | "Eksamensforberedelse" | "Exam Preparation" => Ok(WorkloadType::ExamPreparation), 278 | "Holdundervisning" | "Class Instruction" => Ok(WorkloadType::ClassInstruction), 279 | "Practical exercises" | "Praktiske øvelser" => Ok(WorkloadType::PracticalExercises), 280 | "E-Learning" | "E-læring" => Ok(WorkloadType::ELearning), 281 | "Feltarbejde" | "Field Work" => Ok(WorkloadType::FieldWork), 282 | "Studiegrupper" | "Study Groups" => Ok(WorkloadType::StudyGroup), 283 | "Seminar" => Ok(WorkloadType::Seminar), 284 | "Ekskursioner" | "Excursions" => Ok(WorkloadType::Excursions), 285 | "Laboratorie" | "Laboratory" => Ok(WorkloadType::Laboratory), 286 | "Praktik" | "Practical Training" => Ok(WorkloadType::PracticalTraining), 287 | 288 | "Guidance" | "Vejledning" => Ok(WorkloadType::Guidance), 289 | _ => bail!("Unknown workload type: {}", s), 290 | } 291 | } 292 | } 293 | 294 | #[derive(Debug, PartialEq, Serialize)] 295 | pub struct Workload { 296 | workload_type: WorkloadType, 297 | hours: f32, 298 | } 299 | impl Workload { 300 | pub fn new(workload_type: WorkloadType, hours: f32) -> Self { 301 | Self { 302 | workload_type, 303 | hours, 304 | } 305 | } 306 | } 307 | 308 | #[derive(Debug, PartialEq, Serialize)] 309 | pub struct Description { 310 | pub content: String, 311 | pub learning_outcome: String, 312 | pub recommended_qualifications: Option, 313 | pub summary: String, 314 | } 315 | 316 | pub fn parse_course(html: &str) -> Result { 317 | let dom = tl::parse(html, tl::ParserOptions::default())?; 318 | let content = dom.get_element_by_id("content"); 319 | let title = parse_title(&dom)?; 320 | 321 | ensure!( 322 | content.is_some(), 323 | "Unable to find content element, this should not happen" 324 | ); 325 | let info = parse_course_info(&dom).context(format!("Unable to parse course: {title}"))?; 326 | let logistic_info = parse_logistic_info(&dom).context(format!( 327 | "Unable to parse logistic information for course: {title}" 328 | ))?; 329 | 330 | let exam_info = parse_course_exams(&dom).context(format!( 331 | "Unable to parse exam information for course: {title}" 332 | ))?; 333 | 334 | let workload_info = parse_workloads(&dom).context(format!( 335 | "Unable to parse workload information for course: {title}" 336 | ))?; 337 | 338 | let html_info = grab_htmls(&dom).context(format!( 339 | "Unable to grab html information for course: {title}" 340 | ))?; 341 | 342 | Ok(Course { 343 | title, 344 | info, 345 | logistics: logistic_info, 346 | exams: exam_info, 347 | workloads: workload_info, 348 | description: html_info, 349 | }) 350 | } 351 | fn parse_title(dom: &VDom) -> Result { 352 | let title = dom 353 | .get_elements_by_class_name("courseTitle") 354 | .next() 355 | .context("Unable to find course title") 356 | .and_then(|elem| { 357 | elem.get(dom.parser()) 358 | .context("Unable to grab parser for the dom, this should not happen") 359 | .map(|tag| tag.inner_text(dom.parser())) 360 | }); 361 | 362 | let binding = title 363 | .unwrap_or_else(|_| "Error unwrapping html title".into()) 364 | .replace(['\u{a0}', '\n'], " "); 365 | 366 | // Then split them 367 | let res: Vec<&str> = binding.split_whitespace().collect(); 368 | 369 | // Return only the part of the title without the course code 370 | ensure!( 371 | res.len() >= 2, 372 | "Title does not conform to expected structure: " 373 | ); 374 | 375 | Ok(res[1..].join(" ")) 376 | } 377 | --------------------------------------------------------------------------------