├── .gitignore ├── Cargo.toml ├── README.md ├── .travis.yml ├── LICENSE ├── src ├── http.rs ├── iter.rs └── lib.rs └── tests └── integration.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "wikipedia" 3 | version = "0.5.0" 4 | authors = ["Sebastian Waisbrot "] 5 | license-file = "LICENSE" 6 | description = "Access wikipedia articles from Rust" 7 | readme = "README.md" 8 | edition = "2021" 9 | documentation = "https://seppo0010.github.io/wikipedia-rs/" 10 | 11 | repository = "https://github.com/seppo0010/wikipedia-rs/" 12 | keywords = ["wikipedia", "api"] 13 | 14 | [features] 15 | default = ["http-client"] 16 | http-client = ["reqwest", "url"] 17 | 18 | [dependencies] 19 | serde_json = "1.0.138" 20 | reqwest = { version = "0.12.2", optional = true, features = ["blocking"] } 21 | url = { version = "2.5.4", optional = true } 22 | thiserror = "2.0.11" 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # wikipedia-rs 2 | 3 | [![Build Status](https://travis-ci.org/seppo0010/wikipedia-rs.svg?branch=master)](https://travis-ci.org/seppo0010/wikipedia-rs) 4 | [![crates.io](http://meritbadge.herokuapp.com/wikipedia)](https://crates.io/crates/wikipedia) 5 | 6 | 7 | Access wikipedia articles from Rust. 8 | 9 | The crate is called `wikipedia` and you can depend on it via cargo: 10 | 11 | ```toml 12 | [dependencies] 13 | wikipedia = "0.5.0" 14 | ``` 15 | 16 | 17 | # Examples 18 | 19 | ```rust 20 | extern crate wikipedia; 21 | 22 | let wiki = wikipedia::Wikipedia::::default(); 23 | let page = wiki.page_from_title("Club Atletico River Plate".to_owned()); 24 | let content = page.get_content().unwrap(); 25 | assert!(content.contains("B Nacional")); 26 | ``` 27 | 28 | # Documentation 29 | 30 | https://seppo0010.github.io/wikipedia-rs/ 31 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | sudo: false 3 | 4 | before_script: 5 | - pip install 'travis-cargo<0.2' --user && export PATH=$HOME/.local/bin:$PATH 6 | 7 | script: 8 | - cargo test 9 | - if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then cargo doc; fi 10 | - rustdoc --test README.md -L target/debug -L target/debug/deps 11 | 12 | rust: 13 | - stable 14 | - beta 15 | - nightly 16 | 17 | notifications: 18 | email: false 19 | 20 | after_success: 21 | - travis-cargo --only nightly doc-upload 22 | 23 | env: 24 | global: 25 | - secure: s3MV1MOyPg1O22Ssyk39gOhVRpxiJjo8sbgFvhE9U/8owbvJ29dQn1iQPcWvspNiBOU1u8IhMPtECr0YRCheqBrGov1R6Wa3S96+6ZsW7rufBGkj7q0YnZ3ho/N0ONmrlCzr+tjKOv3X3/xMqUDiemgry3Id5ZVz3Tq8ZT57kIHUdvpkiUmoxYMkq2N37/qB9O9+xYE40u7CsxTMCOlArFUT3Itt8C7XXcJETFs8VX0w5vOC0Gblht8lb3T5NqJxOIzeYGAa3wyjmlJAf9eWW22u4MDgGO/YwS7p1ronEkXsJDl3AgP2sqVXmKqy8L0q714Q6HrMHQ31aHFA190AYiZQlC2vE+h9YXYm8iIUirBeEnj0Xur2FBN0kNPUcffGHcAn5IA8w1ItPLoldDg1TkEuYRuEGXfWwAGdzNAc8Ss/c8puOd4Yb/gfm/bg3ZiZnsX36tlLqHHCRddPIW2/nZU5i8TiIpornxZCvMG7GpMREQt9C4vXjhkXuZqYHP5Dk/PMA8fSuNiFhOipX1JGwsUd4E/qXmQipeq6gormuGNrogHZtO52Lx93wLxR/1Vzhf+mY0S9K0e5hI6ewlwpoieR7XzqBNl5IHMSV1ybyc31+bJWiP8K5flXyelr7FDa0yRu15qaxHkmJyyEXtg2tFN5gN0jir4XAzwTmdvReDo= 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Sebastian Waisbrot 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of wikipedia-rs nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /src/http.rs: -------------------------------------------------------------------------------- 1 | pub use crate::Error; 2 | 3 | pub trait HttpClient { 4 | /// Set the user agent. Default user agent is empty string. 5 | fn user_agent(&mut self, user_agent: String); 6 | 7 | /// Set a Wikimedia Personal API authentication token. 8 | fn bearer_token(&mut self, bearer_token: String); 9 | 10 | /// Run an http request with the given url and args, returning 11 | /// the result as a string. 12 | fn get<'a, I>(&self, base_url: &str, args: I) -> Result 13 | where 14 | I: Iterator; 15 | } 16 | 17 | #[cfg(feature = "http-client")] 18 | pub mod default { 19 | use reqwest; 20 | use std::io::Read; 21 | 22 | use super::{Error, HttpClient}; 23 | 24 | pub struct Client { 25 | user_agent: String, 26 | bearer_token: Option, 27 | } 28 | 29 | impl Default for Client { 30 | fn default() -> Self { 31 | Client { 32 | user_agent: "wikipedia (https://github.com/seppo0010/wikipedia-rs)".to_owned(), 33 | bearer_token: None, 34 | } 35 | } 36 | } 37 | 38 | impl From for Error { 39 | fn from(e: reqwest::Error) -> Error { 40 | Error::HTTPError(Box::new(e)) 41 | } 42 | } 43 | 44 | impl HttpClient for Client { 45 | fn user_agent(&mut self, user_agent: String) { 46 | self.user_agent = user_agent; 47 | } 48 | 49 | fn bearer_token(&mut self, bearer_token: String) { 50 | self.bearer_token = Some(bearer_token); 51 | } 52 | 53 | fn get<'a, I>(&self, base_url: &str, args: I) -> Result 54 | where 55 | I: Iterator, 56 | { 57 | let url = 58 | reqwest::Url::parse_with_params(base_url, args).map_err(|_| Error::URLError)?; 59 | let mut request = reqwest::blocking::Client::new() 60 | .get(url) 61 | .header(reqwest::header::USER_AGENT, self.user_agent.clone()); 62 | if let Some(ref bearer_token) = self.bearer_token { 63 | request = request.header( 64 | reqwest::header::AUTHORIZATION, 65 | format!("Bearer {}", bearer_token), 66 | ); 67 | } 68 | let mut response = request.send()?.error_for_status()?; 69 | 70 | let mut response_str = String::new(); 71 | response.read_to_string(&mut response_str)?; 72 | Ok(response_str) 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/iter.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | use std::vec::IntoIter; 3 | 4 | use serde_json::Value; 5 | 6 | use super::{http, Page, Result}; 7 | 8 | pub type IterElems = Vec<(String, String)>; 9 | 10 | pub struct Iter<'a, A: 'a + http::HttpClient, B: IterItem> { 11 | page: &'a Page<'a, A>, 12 | inner: IntoIter, 13 | cont: Option>, 14 | phantom: PhantomData, 15 | } 16 | 17 | impl<'a, A: http::HttpClient, B: IterItem> Iter<'a, A, B> { 18 | pub fn new(page: &'a Page) -> Result> { 19 | let (array, cont) = B::request_next(page, &None)?; 20 | Ok(Iter { 21 | page, 22 | inner: array.into_iter(), 23 | cont, 24 | phantom: PhantomData, 25 | }) 26 | } 27 | 28 | fn fetch_next(&mut self) -> Result<()> { 29 | if self.cont.is_some() { 30 | let (array, cont) = B::request_next(self.page, &self.cont)?; 31 | self.inner = array.into_iter(); 32 | self.cont = cont; 33 | } 34 | Ok(()) 35 | } 36 | } 37 | 38 | impl Iterator for Iter<'_, A, B> { 39 | type Item = B; 40 | fn next(&mut self) -> Option { 41 | match self.inner.next() { 42 | Some(ref v) => B::from_value(v), 43 | None => match self.cont { 44 | Some(_) => match self.fetch_next() { 45 | Ok(_) => self.inner.next().and_then(|x| B::from_value(&x)), 46 | Err(_) => None, 47 | }, 48 | None => None, 49 | }, 50 | } 51 | } 52 | } 53 | 54 | pub trait IterItem: Sized { 55 | fn request_next( 56 | page: &Page, 57 | cont: &Option, 58 | ) -> Result<(Vec, Option)>; 59 | fn from_value(value: &Value) -> Option; 60 | } 61 | 62 | #[derive(Debug, PartialEq)] 63 | pub struct Image { 64 | pub url: String, 65 | pub title: String, 66 | pub description_url: String, 67 | } 68 | 69 | impl IterItem for Image { 70 | fn request_next( 71 | page: &Page, 72 | cont: &Option, 73 | ) -> Result<(Vec, Option)> { 74 | page.request_images(cont) 75 | } 76 | 77 | fn from_value(value: &Value) -> Option { 78 | let obj = value.as_object()?; 79 | 80 | let title = obj 81 | .get("title") 82 | .and_then(|x| x.as_str()) 83 | .unwrap_or("") 84 | .to_owned(); 85 | let url = obj 86 | .get("imageinfo") 87 | .and_then(|x| x.as_array()) 88 | .and_then(|x| x.iter().next()) 89 | .and_then(|x| x.as_object()) 90 | .and_then(|x| x.get("url")) 91 | .and_then(|x| x.as_str()) 92 | .unwrap_or("") 93 | .to_owned(); 94 | let description_url = obj 95 | .get("imageinfo") 96 | .and_then(|x| x.as_array()) 97 | .and_then(|x| x.iter().next()) 98 | .and_then(|x| x.as_object()) 99 | .and_then(|x| x.get("descriptionurl")) 100 | .and_then(|x| x.as_str()) 101 | .unwrap_or("") 102 | .to_owned(); 103 | 104 | Some(Image { 105 | url: url.to_owned(), 106 | title: title.to_owned(), 107 | description_url: description_url.to_owned(), 108 | }) 109 | } 110 | } 111 | 112 | #[derive(Debug, PartialEq)] 113 | pub struct Reference { 114 | pub url: String, 115 | } 116 | 117 | impl IterItem for Reference { 118 | fn request_next( 119 | page: &Page, 120 | cont: &Option, 121 | ) -> Result<(Vec, Option)> { 122 | page.request_extlinks(cont) 123 | } 124 | 125 | fn from_value(value: &Value) -> Option { 126 | value 127 | .as_object() 128 | .and_then(|x| x.get("*")) 129 | .and_then(|x| x.as_str()) 130 | .map(|s| Reference { 131 | url: if s.starts_with("http:") { 132 | s.to_owned() 133 | } else { 134 | format!("http:{}", s) 135 | }, 136 | }) 137 | } 138 | } 139 | 140 | #[derive(Debug, PartialEq)] 141 | pub struct Link { 142 | pub title: String, 143 | } 144 | 145 | impl IterItem for Link { 146 | fn request_next( 147 | page: &Page, 148 | cont: &Option>, 149 | ) -> Result<(Vec, Option>)> { 150 | page.request_links(cont) 151 | } 152 | 153 | fn from_value(value: &Value) -> Option { 154 | value 155 | .as_object() 156 | .and_then(|x| x.get("title")) 157 | .and_then(|x| x.as_str()) 158 | .map(|s| Link { 159 | title: s.to_owned(), 160 | }) 161 | } 162 | } 163 | 164 | #[derive(Debug, PartialEq)] 165 | pub struct LangLink { 166 | /// The language ID 167 | pub lang: String, 168 | 169 | /// The page title in this language, may be `None` if undefined 170 | pub title: Option, 171 | } 172 | 173 | impl IterItem for LangLink { 174 | fn request_next( 175 | page: &Page, 176 | cont: &Option>, 177 | ) -> Result<(Vec, Option>)> { 178 | page.request_langlinks(cont) 179 | } 180 | 181 | fn from_value(value: &Value) -> Option { 182 | value.as_object().map(|l| LangLink { 183 | lang: l.get("lang").unwrap().as_str().unwrap().into(), 184 | title: l.get("*").and_then(|n| n.as_str()).map(|n| n.into()), 185 | }) 186 | } 187 | } 188 | 189 | #[derive(Debug, PartialEq)] 190 | pub struct Category { 191 | pub title: String, 192 | } 193 | 194 | impl IterItem for Category { 195 | fn request_next( 196 | page: &Page, 197 | cont: &Option>, 198 | ) -> Result<(Vec, Option>)> { 199 | page.request_categories(cont) 200 | } 201 | 202 | fn from_value(value: &Value) -> Option { 203 | value 204 | .as_object() 205 | .and_then(|x| x.get("title")) 206 | .and_then(|x| x.as_str()) 207 | .map(|s| Category { 208 | title: if let Some(st) = s.strip_prefix("Category: ") { 209 | st.to_owned() 210 | } else { 211 | s.to_owned() 212 | }, 213 | }) 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /tests/integration.rs: -------------------------------------------------------------------------------- 1 | extern crate wikipedia; 2 | 3 | #[cfg(feature = "http-client")] 4 | mod tests { 5 | use std::collections::HashSet; 6 | use wikipedia::http; 7 | use wikipedia::Wikipedia; 8 | 9 | fn w() -> Wikipedia { 10 | Wikipedia::default() 11 | } 12 | 13 | #[test] 14 | fn search() { 15 | let wikipedia = w(); 16 | let results = wikipedia.search("hello world").unwrap(); 17 | assert!(!results.is_empty()); 18 | assert!(results.contains(&"\"Hello, World!\" program".to_owned())); 19 | } 20 | 21 | #[test] 22 | fn geosearch() { 23 | let wikipedia = w(); 24 | let results = wikipedia.geosearch(-34.607307, -58.445566, 1000).unwrap(); 25 | assert!(!results.is_empty()); 26 | eprintln!("{:?}", results); 27 | assert!(results.contains(&"Villa Crespo".to_owned())); 28 | } 29 | 30 | #[test] 31 | fn random() { 32 | let wikipedia = w(); 33 | wikipedia.random().unwrap().unwrap(); 34 | } 35 | 36 | #[test] 37 | fn random_count() { 38 | let wikipedia = w(); 39 | assert_eq!(wikipedia.random_count(3).unwrap().len(), 3); 40 | } 41 | 42 | #[test] 43 | fn page_content() { 44 | let wikipedia = w(); 45 | let page = wikipedia.page_from_title("Parkinson's law of triviality".to_owned()); 46 | assert!(page.get_content().unwrap().contains("bike-shedding")); 47 | } 48 | 49 | #[test] 50 | fn title() { 51 | let wikipedia = w(); 52 | let page = wikipedia.page_from_title("Parkinson's law of triviality".to_owned()); 53 | assert_eq!( 54 | page.get_title().unwrap(), 55 | "Parkinson's law of triviality".to_owned() 56 | ); 57 | let page = wikipedia.page_from_pageid("4138548".to_owned()); 58 | assert_eq!(page.get_title().unwrap(), "Law of triviality".to_owned()); 59 | } 60 | 61 | #[test] 62 | fn pageid() { 63 | let wikipedia = w(); 64 | let page = wikipedia.page_from_title("Parkinson's law of triviality".to_owned()); 65 | assert_eq!(page.get_pageid().unwrap(), "4138548".to_owned()); 66 | let page = wikipedia.page_from_title("Bikeshedding".to_owned()); 67 | assert_eq!(page.get_pageid().unwrap(), "4138548".to_owned()); 68 | let page = wikipedia.page_from_pageid("4138548".to_owned()); 69 | assert_eq!(page.get_pageid().unwrap(), "4138548".to_owned()); 70 | } 71 | 72 | #[test] 73 | fn page_html_content() { 74 | let wikipedia = w(); 75 | let page = wikipedia.page_from_pageid("4138548".to_owned()); 76 | let html = page.get_html_content().unwrap(); 77 | assert!(html.contains("bike-shedding")); 78 | assert!(html.contains("")); // it would not be html otherwise 79 | } 80 | 81 | #[test] 82 | fn page_summary() { 83 | let wikipedia = w(); 84 | let page = wikipedia.page_from_title("Parkinson's law of triviality".to_owned()); 85 | let summary = page.get_summary().unwrap(); 86 | let content = page.get_content().unwrap(); 87 | assert!(summary.contains("bike-shedding")); 88 | assert!(summary.len() < content.len()); 89 | } 90 | 91 | #[test] 92 | fn page_redirect_summary() { 93 | let wikipedia = w(); 94 | let page = wikipedia.page_from_title("Bikeshedding".to_owned()); 95 | let summary = page.get_summary().unwrap(); 96 | let content = page.get_content().unwrap(); 97 | assert!(summary.contains("bike-shedding")); 98 | assert!(summary.len() < content.len()); 99 | } 100 | 101 | #[test] 102 | fn page_images() { 103 | let mut wikipedia = w(); 104 | wikipedia.images_results = "5".to_owned(); 105 | let page = wikipedia.page_from_title("Argentina".to_owned()); 106 | let images = page.get_images().unwrap(); 107 | let mut c = 0; 108 | let mut set = HashSet::new(); 109 | for i in images { 110 | assert!(!i.title.is_empty()); 111 | assert!(!i.url.is_empty()); 112 | assert!(!i.description_url.is_empty()); 113 | c += 1; 114 | set.insert(i.title); 115 | if c == 11 { 116 | break; 117 | } 118 | } 119 | assert_eq!(set.len(), 11); 120 | } 121 | 122 | #[test] 123 | fn coordinates() { 124 | let wikipedia = w(); 125 | let page = wikipedia.page_from_title("San Francisco".to_owned()); 126 | let (lat, lon) = page.get_coordinates().unwrap().unwrap(); 127 | assert!(lat > 0.0); 128 | assert!(lon < 0.0); 129 | } 130 | 131 | #[test] 132 | fn no_coordinates() { 133 | let wikipedia = w(); 134 | let page = wikipedia.page_from_title("Bikeshedding".to_owned()); 135 | assert!(page.get_coordinates().unwrap().is_none()); 136 | } 137 | 138 | #[test] 139 | fn references() { 140 | let mut wikipedia = w(); 141 | wikipedia.links_results = "3".to_owned(); 142 | let page = wikipedia.page_from_title("Argentina".to_owned()); 143 | let references = page.get_references().unwrap(); 144 | let mut c = 0; 145 | let mut set = HashSet::new(); 146 | for r in references { 147 | assert!(r.url.starts_with("http")); 148 | c += 1; 149 | set.insert(r.url); 150 | if c == 7 { 151 | break; 152 | } 153 | } 154 | assert_eq!(set.len(), 7); 155 | } 156 | 157 | #[test] 158 | fn links() { 159 | let mut wikipedia = w(); 160 | wikipedia.links_results = "3".to_owned(); 161 | let page = wikipedia.page_from_title("Argentina".to_owned()); 162 | let links = page.get_links().unwrap(); 163 | let mut c = 0; 164 | let mut set = HashSet::new(); 165 | for r in links { 166 | c += 1; 167 | set.insert(r.title); 168 | if c == 7 { 169 | break; 170 | } 171 | } 172 | assert_eq!(set.len(), 7); 173 | } 174 | 175 | #[test] 176 | fn langlinks() { 177 | let mut wikipedia = w(); 178 | wikipedia.links_results = "3".to_owned(); 179 | let page = wikipedia.page_from_title("Law of triviality".to_owned()); 180 | let langlinks = page.get_langlinks().unwrap().collect::>(); 181 | assert_eq!( 182 | langlinks.iter().find(|ll| ll.lang == *"nl").unwrap().title, 183 | Some("Trivialiteitswet van Parkinson".into()), 184 | ); 185 | assert_eq!( 186 | langlinks.iter().find(|ll| ll.lang == *"fr").unwrap().title, 187 | Some("Loi de futilité de Parkinson".into()), 188 | ); 189 | } 190 | 191 | #[test] 192 | fn categories() { 193 | let mut wikipedia = w(); 194 | wikipedia.categories_results = "3".to_owned(); 195 | let page = wikipedia.page_from_title("Argentina".to_owned()); 196 | let categories = page.get_links().unwrap(); 197 | let mut c = 0; 198 | let mut set = HashSet::new(); 199 | for ca in categories { 200 | c += 1; 201 | set.insert(ca.title); 202 | if c == 7 { 203 | break; 204 | } 205 | } 206 | assert_eq!(set.len(), 7); 207 | } 208 | 209 | #[test] 210 | fn sections() { 211 | let wikipedia = w(); 212 | let page = wikipedia.page_from_title("Bikeshedding".to_owned()); 213 | assert_eq!( 214 | page.get_sections().unwrap(), 215 | vec![ 216 | "Argument".to_owned(), 217 | "Related principles and formulations".to_owned(), 218 | "See also".to_owned(), 219 | "References".to_owned(), 220 | "Further reading".to_owned(), 221 | "External links".to_owned(), 222 | ] 223 | ) 224 | } 225 | 226 | #[test] 227 | fn sections2() { 228 | let wikipedia = w(); 229 | let page = wikipedia.page_from_pageid("4138548".to_owned()); 230 | assert_eq!( 231 | page.get_sections().unwrap(), 232 | vec![ 233 | "Argument".to_owned(), 234 | "Related principles and formulations".to_owned(), 235 | "See also".to_owned(), 236 | "References".to_owned(), 237 | "Further reading".to_owned(), 238 | "External links".to_owned(), 239 | ] 240 | ) 241 | } 242 | 243 | #[test] 244 | fn section_content() { 245 | let wikipedia = w(); 246 | let page = wikipedia.page_from_pageid("4138548".to_owned()); 247 | assert!(page 248 | .get_section_content("Argument") 249 | .unwrap() 250 | .unwrap() 251 | .contains("reactor is so vastly expensive")) 252 | } 253 | 254 | #[test] 255 | fn languages() { 256 | let languages = w().get_languages().unwrap(); 257 | assert!(languages.contains(&("en".to_owned(), "English".to_owned()))); 258 | assert!(languages.contains(&("es".to_owned(), "español".to_owned()))); 259 | } 260 | } 261 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Access wikipedia articles from Rust. 2 | //! 3 | //! # Examples 4 | //! ``` 5 | //! extern crate wikipedia; 6 | //! 7 | //! let wiki = wikipedia::Wikipedia::::default(); 8 | //! let page = wiki.page_from_title("Club Atletico River Plate".to_owned()); 9 | //! let content = page.get_content().unwrap(); 10 | //! assert!(content.contains("B Nacional")); 11 | //! ``` 12 | #[cfg(feature = "http-client")] 13 | extern crate reqwest; 14 | extern crate serde_json; 15 | extern crate thiserror; 16 | #[cfg(feature = "http-client")] 17 | extern crate url; 18 | 19 | use std::cmp::PartialEq; 20 | use std::io; 21 | use std::result; 22 | 23 | pub mod http; 24 | pub mod iter; 25 | pub use iter::{Iter, IterElems}; 26 | 27 | const LANGUAGE_URL_MARKER: &str = "{language}"; 28 | 29 | macro_rules! results { 30 | ($data: expr, $query_field: expr) => { 31 | // There has to be a better way to write the following code 32 | $data 33 | .as_object() 34 | .and_then(|x| x.get("query")) 35 | .and_then(|x| x.as_object()) 36 | .and_then(|x| x.get($query_field)) 37 | .and_then(|x| x.as_array()) 38 | .ok_or(Error::JSONPathError)? 39 | .into_iter() 40 | .filter_map(|i| { 41 | i.as_object() 42 | .and_then(|i| i.get("title")) 43 | .and_then(|s| s.as_str().map(|s| s.to_owned())) 44 | }) 45 | .collect() 46 | }; 47 | } 48 | 49 | macro_rules! cont { 50 | ($this: expr, $cont: expr, $($params: expr),*) => {{ 51 | let qp = $this.identifier.query_param(); 52 | let mut params = vec![ 53 | $($params),*, 54 | ("format", "json"), 55 | ("action", "query"), 56 | (&*qp.0, &*qp.1), 57 | ]; 58 | match *$cont { 59 | Some(ref v) => { 60 | for x in v.iter() { params.push((&*x.0, &*x.1)); } 61 | }, 62 | None => params.push(("continue", "")), 63 | } 64 | let q = $this.wikipedia.query(params.into_iter())?; 65 | 66 | let pages = q 67 | .as_object() 68 | .and_then(|x| x.get("query")) 69 | .and_then(|x| x.as_object()) 70 | .and_then(|x| x.get("pages")) 71 | .and_then(|x| x.as_object()) 72 | .ok_or(Error::JSONPathError)?; 73 | 74 | Ok((pages.values().cloned().collect(), $this.parse_cont(&q)?)) 75 | }} 76 | } 77 | 78 | /// Wikipedia failed to fetch some information 79 | #[derive(thiserror::Error, Debug)] 80 | pub enum Error { 81 | /// Malformed URL 82 | #[error("URL Error")] 83 | URLError, 84 | /// Some error communicating with the server 85 | #[error("HTTP Error")] 86 | HTTPError(#[from] Box), 87 | /// Error reading response 88 | #[error("IO Error: {0}")] 89 | IOError(#[from] io::Error), 90 | /// Failed to parse JSON response 91 | #[error("JSON Error: {0}")] 92 | JSONError(#[from] serde_json::error::Error), 93 | /// Missing required keys in the JSON response 94 | #[error("JSON Path Error")] 95 | JSONPathError, 96 | /// One of the parameters provided (identified by `String`) is invalid 97 | #[error("Invalid Parameter: {0}")] 98 | InvalidParameter(String), 99 | } 100 | 101 | pub type Result = result::Result; 102 | 103 | #[derive(Debug)] 104 | pub struct Wikipedia { 105 | /// HttpClient struct. 106 | pub client: A, 107 | /// Url is created by concatenating `pre_language_url` + `language` + `post_language_url`. 108 | pub pre_language_url: String, 109 | pub post_language_url: String, 110 | pub language: String, 111 | /// Number of results to fetch when searching. 112 | pub search_results: u32, 113 | /// Number of images to fetch in each request when calling `get_images`. 114 | /// The iterator will go through all of them, fetching pages of this size. 115 | /// It can be the string "max" to fetch as many as possible on every request. 116 | pub images_results: String, 117 | /// Like `images_results`, for links and references. 118 | pub links_results: String, 119 | /// Like `images_results`, for categories. 120 | pub categories_results: String, 121 | } 122 | 123 | impl Default for Wikipedia { 124 | fn default() -> Self { 125 | Wikipedia::new(A::default()) 126 | } 127 | } 128 | 129 | impl Clone for Wikipedia { 130 | fn clone(&self) -> Self { 131 | Wikipedia { 132 | client: self.client.clone(), 133 | pre_language_url: self.pre_language_url.clone(), 134 | post_language_url: self.post_language_url.clone(), 135 | language: self.language.clone(), 136 | search_results: self.search_results, 137 | images_results: self.images_results.clone(), 138 | links_results: self.links_results.clone(), 139 | categories_results: self.categories_results.clone(), 140 | } 141 | } 142 | } 143 | 144 | impl Wikipedia { 145 | /// Creates a new object using the provided client and default values. 146 | pub fn new(client: A) -> Self { 147 | Wikipedia { 148 | client, 149 | pre_language_url: "https://".to_owned(), 150 | post_language_url: ".wikipedia.org/w/api.php".to_owned(), 151 | language: "en".to_owned(), 152 | search_results: 10, 153 | images_results: "max".to_owned(), 154 | links_results: "max".to_owned(), 155 | categories_results: "max".to_owned(), 156 | } 157 | } 158 | 159 | /// Returns a list of languages in the form of (`identifier`, `language`), 160 | /// for example [("en", "English"), ("es", "Español")] 161 | pub fn get_languages(&self) -> Result { 162 | let q = self.query( 163 | vec![ 164 | ("meta", "siteinfo"), 165 | ("siprop", "languages"), 166 | ("format", "json"), 167 | ("action", "query"), 168 | ] 169 | .into_iter(), 170 | )?; 171 | 172 | Ok(q.as_object() 173 | .and_then(|x| x.get("query")) 174 | .and_then(|x| x.as_object()) 175 | .and_then(|x| x.get("languages")) 176 | .and_then(|x| x.as_array()) 177 | .ok_or(Error::JSONPathError)? 178 | .iter() 179 | .filter_map(|x| { 180 | let o = x.as_object(); 181 | Some(( 182 | match o 183 | .and_then(|x| x.get("code")) 184 | .and_then(|x| x.as_str()) 185 | .map(|x| x.to_owned()) 186 | { 187 | Some(v) => v, 188 | None => return None, 189 | }, 190 | match o 191 | .and_then(|x| x.get("*")) 192 | .and_then(|x| x.as_str()) 193 | .map(|x| x.to_owned()) 194 | { 195 | Some(v) => v, 196 | None => return None, 197 | }, 198 | )) 199 | }) 200 | .collect()) 201 | } 202 | 203 | /// Returns the api url 204 | pub fn base_url(&self) -> String { 205 | format!( 206 | "{}{}{}", 207 | self.pre_language_url, self.language, self.post_language_url 208 | ) 209 | } 210 | 211 | /// Updates the url format. The substring `{language}` will be replaced 212 | /// with the selected language. 213 | pub fn set_base_url(&mut self, base_url: &str) { 214 | let index = match base_url.find(LANGUAGE_URL_MARKER) { 215 | Some(i) => i, 216 | None => { 217 | self.pre_language_url = base_url.to_owned(); 218 | self.language = "".to_owned(); 219 | self.post_language_url = "".to_owned(); 220 | return; 221 | } 222 | }; 223 | self.pre_language_url = base_url[0..index].to_owned(); 224 | self.post_language_url = base_url[index + LANGUAGE_URL_MARKER.len()..].to_owned(); 225 | } 226 | 227 | fn query<'a, I>(&self, args: I) -> Result 228 | where 229 | I: Iterator, 230 | { 231 | let response_str = self.client.get(&self.base_url(), args)?; 232 | let json = serde_json::from_str(&response_str).map_err(Error::JSONError)?; 233 | Ok(json) 234 | } 235 | 236 | /// Searches for a string and returns a list of relevant page titles. 237 | /// 238 | /// # Examples 239 | /// 240 | /// ``` 241 | /// extern crate wikipedia; 242 | /// 243 | /// let wiki = wikipedia::Wikipedia::::default(); 244 | /// let results = wiki.search("keyboard").unwrap(); 245 | /// assert!(results.contains(&"Computer keyboard".to_owned())); 246 | /// ``` 247 | pub fn search(&self, query: &str) -> Result> { 248 | let results = &*format!("{}", self.search_results); 249 | let data = self.query( 250 | vec![ 251 | ("list", "search"), 252 | ("srprop", ""), 253 | ("srlimit", results), 254 | ("srsearch", query), 255 | ("format", "json"), 256 | ("action", "query"), 257 | ] 258 | .into_iter(), 259 | )?; 260 | 261 | Ok(results!(data, "search")) 262 | } 263 | 264 | /// Search articles within `radius` meters of `latitude` and `longitude`. 265 | /// 266 | /// # Examples 267 | /// 268 | /// ``` 269 | /// extern crate wikipedia; 270 | /// 271 | /// let wiki = wikipedia::Wikipedia::::default(); 272 | /// let results = wiki.geosearch(40.750556,-73.993611, 20).unwrap(); 273 | /// assert!(results.contains(&"Madison Square Garden".to_owned())); 274 | /// ``` 275 | pub fn geosearch(&self, latitude: f64, longitude: f64, radius: u16) -> Result> { 276 | #![allow(clippy::manual_range_contains)] 277 | 278 | if latitude < -90.0 || latitude > 90.0 { 279 | return Err(Error::InvalidParameter("latitude".to_string())); 280 | } 281 | if longitude < -180.0 || longitude > 180.0 { 282 | return Err(Error::InvalidParameter("longitude".to_string())); 283 | } 284 | if radius < 10 || radius > 10000 { 285 | return Err(Error::InvalidParameter("radius".to_string())); 286 | } 287 | let results = &*format!("{}", self.search_results); 288 | let data = self.query( 289 | vec![ 290 | ("list", "geosearch"), 291 | ("gsradius", &*format!("{}", radius)), 292 | ("gscoord", &*format!("{}|{}", latitude, longitude)), 293 | ("gslimit", results), 294 | ("format", "json"), 295 | ("action", "query"), 296 | ] 297 | .into_iter(), 298 | )?; 299 | Ok(results!(data, "geosearch")) 300 | } 301 | 302 | /// Fetches `count` random articles' title. 303 | pub fn random_count(&self, count: u8) -> Result> { 304 | let data = self.query( 305 | vec![ 306 | ("list", "random"), 307 | ("rnnamespace", "0"), 308 | ("rnlimit", &*format!("{}", count)), 309 | ("format", "json"), 310 | ("action", "query"), 311 | ] 312 | .into_iter(), 313 | )?; 314 | let r: Vec = results!(data, "random"); 315 | Ok(r) 316 | } 317 | 318 | /// Fetches a random article's title. 319 | pub fn random(&self) -> Result> { 320 | Ok(self.random_count(1)?.into_iter().next()) 321 | } 322 | 323 | /// Creates a new `Page` given a `title`. 324 | pub fn page_from_title(&self, title: String) -> Page<'_, A> { 325 | Page::from_title(self, title) 326 | } 327 | 328 | /// Creates a new `Page` given a `pageid`. 329 | pub fn page_from_pageid(&self, pageid: String) -> Page<'_, A> { 330 | Page::from_pageid(self, pageid) 331 | } 332 | } 333 | 334 | #[derive(Debug)] 335 | enum TitlePageId { 336 | Title(String), 337 | PageId(String), 338 | } 339 | 340 | impl TitlePageId { 341 | fn query_param(&self) -> (String, String) { 342 | match *self { 343 | TitlePageId::Title(ref s) => ("titles".to_owned(), s.clone()), 344 | TitlePageId::PageId(ref s) => ("pageids".to_owned(), s.clone()), 345 | } 346 | } 347 | } 348 | 349 | #[derive(Debug)] 350 | pub struct Page<'a, A: 'a + http::HttpClient> { 351 | wikipedia: &'a Wikipedia, 352 | identifier: TitlePageId, 353 | } 354 | 355 | /// A wikipedia article. 356 | impl<'a, A: http::HttpClient> Page<'a, A> { 357 | /// Creates a new `Page` given a `title`. 358 | pub fn from_title(wikipedia: &'a Wikipedia, title: String) -> Page<'a, A> { 359 | Page { 360 | wikipedia, 361 | identifier: TitlePageId::Title(title), 362 | } 363 | } 364 | 365 | /// Creates a new `Page` given a `pageid`. 366 | pub fn from_pageid(wikipedia: &'a Wikipedia, pageid: String) -> Page<'a, A> { 367 | Page { 368 | wikipedia, 369 | identifier: TitlePageId::PageId(pageid), 370 | } 371 | } 372 | 373 | /// Gets the `Page`'s `pageid`. 374 | pub fn get_pageid(&self) -> Result { 375 | match self.identifier { 376 | TitlePageId::PageId(ref s) => Ok(s.clone()), 377 | TitlePageId::Title(_) => { 378 | let qp = self.identifier.query_param(); 379 | let q = self.wikipedia.query( 380 | vec![ 381 | ("prop", "info|pageprops"), 382 | ("inprop", "url"), 383 | ("ppprop", "disambiguation"), 384 | ("redirects", ""), 385 | ("format", "json"), 386 | ("action", "query"), 387 | (&*qp.0, &*qp.1), 388 | ] 389 | .into_iter(), 390 | )?; 391 | 392 | if let Some(r) = self.redirect(&q) { 393 | return Page::from_title(self.wikipedia, r).get_pageid(); 394 | } 395 | let pages = q 396 | .as_object() 397 | .and_then(|x| x.get("query")) 398 | .and_then(|x| x.as_object()) 399 | .and_then(|x| x.get("pages")) 400 | .and_then(|x| x.as_object()) 401 | .ok_or(Error::JSONPathError)?; 402 | pages.keys().next().cloned().ok_or(Error::JSONPathError) 403 | } 404 | } 405 | } 406 | 407 | /// Gets the `Page`'s `title`. 408 | pub fn get_title(&self) -> Result { 409 | match self.identifier { 410 | TitlePageId::Title(ref s) => Ok(s.clone()), 411 | TitlePageId::PageId(_) => { 412 | let qp = self.identifier.query_param(); 413 | let q = self.wikipedia.query( 414 | vec![ 415 | ("prop", "info|pageprops"), 416 | ("inprop", "url"), 417 | ("ppprop", "disambiguation"), 418 | ("redirects", ""), 419 | ("format", "json"), 420 | ("action", "query"), 421 | (&*qp.0, &*qp.1), 422 | ] 423 | .into_iter(), 424 | )?; 425 | 426 | if let Some(r) = self.redirect(&q) { 427 | return Ok(r); 428 | } 429 | let pages = q 430 | .as_object() 431 | .and_then(|x| x.get("query")) 432 | .and_then(|x| x.as_object()) 433 | .and_then(|x| x.get("pages")) 434 | .and_then(|x| x.as_object()) 435 | .ok_or(Error::JSONPathError)?; 436 | let page = match pages.values().next() { 437 | Some(p) => p, 438 | None => return Err(Error::JSONPathError), 439 | }; 440 | Ok(page 441 | .as_object() 442 | .and_then(|x| x.get("title")) 443 | .and_then(|x| x.as_str()) 444 | .ok_or(Error::JSONPathError)? 445 | .to_owned()) 446 | } 447 | } 448 | } 449 | 450 | /// If the `Page` redirects to another one it returns its title, otherwise 451 | /// returns None. 452 | fn redirect(&self, q: &serde_json::Value) -> Option { 453 | q.as_object() 454 | .and_then(|x| x.get("query")) 455 | .and_then(|x| x.as_object()) 456 | .and_then(|x| x.get("redirects")) 457 | .and_then(|x| x.as_array()) 458 | .and_then(|x| x.iter().next()) 459 | .and_then(|x| x.as_object()) 460 | .and_then(|x| x.get("to")) 461 | .and_then(|x| x.as_str()) 462 | .map(|x| x.to_owned()) 463 | } 464 | 465 | /// Given a parsed response, usually we access the first page with the data 466 | fn get_first_page<'parsed>( 467 | &self, 468 | data: &'parsed serde_json::Value, 469 | ) -> Option<&'parsed serde_json::Value> { 470 | let pages = data 471 | .as_object() 472 | .and_then(|x| x.get("query")) 473 | .and_then(|x| x.as_object()) 474 | .and_then(|x| x.get("pages")) 475 | .and_then(|x| x.as_object()); 476 | let pageid = pages?.keys().next()?; 477 | pages.unwrap().get(pageid) 478 | } 479 | 480 | /// Gets the markdown content of the article. 481 | pub fn get_content(&self) -> Result { 482 | let qp = self.identifier.query_param(); 483 | let q = self.wikipedia.query( 484 | vec![ 485 | ("prop", "extracts|revisions"), 486 | ("explaintext", ""), 487 | ("rvprop", "ids"), 488 | ("redirects", ""), 489 | ("format", "json"), 490 | ("action", "query"), 491 | (&*qp.0, &*qp.1), 492 | ] 493 | .into_iter(), 494 | )?; 495 | 496 | if let Some(r) = self.redirect(&q) { 497 | return Page::from_title(self.wikipedia, r).get_content(); 498 | } 499 | 500 | Ok(self 501 | .get_first_page(&q) 502 | .and_then(|x| x.as_object()) 503 | .and_then(|x| x.get("extract")) 504 | .and_then(|x| x.as_str()) 505 | .ok_or(Error::JSONPathError)? 506 | .to_owned()) 507 | } 508 | 509 | /// Gets the html content of the article. 510 | pub fn get_html_content(&self) -> Result { 511 | let qp = self.identifier.query_param(); 512 | let q = self.wikipedia.query( 513 | vec![ 514 | ("prop", "revisions"), 515 | ("rvprop", "content"), 516 | ("rvlimit", "1"), 517 | ("rvparse", ""), 518 | ("redirects", ""), 519 | ("format", "json"), 520 | ("action", "query"), 521 | (&*qp.0, &*qp.1), 522 | ] 523 | .into_iter(), 524 | )?; 525 | 526 | if let Some(r) = self.redirect(&q) { 527 | return Page::from_title(self.wikipedia, r).get_html_content(); 528 | } 529 | 530 | Ok(self 531 | .get_first_page(&q) 532 | .and_then(|x| x.as_object()) 533 | .and_then(|x| x.get("revisions")) 534 | .and_then(|x| x.as_array()) 535 | .and_then(|x| x.iter().next()) 536 | .and_then(|x| x.as_object()) 537 | .and_then(|x| x.get("*")) 538 | .and_then(|x| x.as_str()) 539 | .ok_or(Error::JSONPathError)? 540 | .to_owned()) 541 | } 542 | 543 | /// Gets a summary of the article. 544 | pub fn get_summary(&self) -> Result { 545 | let qp = self.identifier.query_param(); 546 | let q = self.wikipedia.query( 547 | vec![ 548 | ("prop", "extracts"), 549 | ("explaintext", ""), 550 | ("exintro", ""), 551 | ("redirects", ""), 552 | ("format", "json"), 553 | ("action", "query"), 554 | (&*qp.0, &*qp.1), 555 | ] 556 | .into_iter(), 557 | )?; 558 | 559 | if let Some(r) = self.redirect(&q) { 560 | return Page::from_title(self.wikipedia, r).get_summary(); 561 | } 562 | 563 | Ok(self 564 | .get_first_page(&q) 565 | .and_then(|x| x.as_object()) 566 | .and_then(|x| x.get("extract")) 567 | .and_then(|x| x.as_str()) 568 | .ok_or(Error::JSONPathError)? 569 | .to_owned()) 570 | } 571 | 572 | /// Receive a json object and extracts any `continue` parameters to be 573 | /// used when browsing following pages. 574 | fn parse_cont(&self, q: &serde_json::Value) -> Result> { 575 | let cont = match q 576 | .as_object() 577 | .and_then(|x| x.get("continue")) 578 | .and_then(|x| x.as_object()) 579 | { 580 | Some(v) => v, 581 | None => return Ok(None), 582 | }; 583 | let mut cont_v = vec![]; 584 | for (k, v) in cont.into_iter() { 585 | let value = match *v { 586 | serde_json::Value::Null => "".to_owned(), 587 | serde_json::Value::Bool(b) => if b { "1" } else { "0" }.to_owned(), 588 | serde_json::Value::Number(ref f) => format!("{}", f), 589 | serde_json::Value::String(ref s) => s.clone(), 590 | _ => return Err(Error::JSONPathError), 591 | }; 592 | cont_v.push((k.clone(), value)); 593 | } 594 | Ok(Some(cont_v)) 595 | } 596 | 597 | fn request_images( 598 | &self, 599 | cont: &Option, 600 | ) -> Result<(Vec, Option)> { 601 | cont!( 602 | self, 603 | cont, 604 | ("generator", "images"), 605 | ("gimlimit", &*self.wikipedia.images_results), 606 | ("prop", "imageinfo"), 607 | ("iiprop", "url") 608 | ) 609 | } 610 | 611 | /// Creates an iterator to view all images in the `Page`. 612 | pub fn get_images(&self) -> Result> { 613 | Iter::new(self) 614 | } 615 | 616 | fn request_extlinks( 617 | &self, 618 | cont: &Option, 619 | ) -> Result<(Vec, Option)> { 620 | let a: Result<(Vec, _)> = cont!( 621 | self, 622 | cont, 623 | ("prop", "extlinks"), 624 | ("ellimit", &*self.wikipedia.links_results) 625 | ); 626 | a.map(|(pages, cont)| { 627 | let page = match pages.into_iter().next() { 628 | Some(p) => p, 629 | None => return (Vec::new(), None), 630 | }; 631 | ( 632 | page.as_object() 633 | .and_then(|x| x.get("extlinks")) 634 | .and_then(|x| x.as_array()) 635 | .map(|x| x.to_vec()) 636 | .unwrap_or(Vec::new()), 637 | cont, 638 | ) 639 | }) 640 | } 641 | 642 | /// Creates an iterator to view all references (external links) in the `Page`. 643 | pub fn get_references(&self) -> Result> { 644 | Iter::new(self) 645 | } 646 | 647 | fn request_links( 648 | &self, 649 | cont: &Option, 650 | ) -> Result<(Vec, Option)> { 651 | let a: Result<(Vec, _)> = cont!( 652 | self, 653 | cont, 654 | ("prop", "links"), 655 | ("plnamespace", "0"), 656 | ("ellimit", &*self.wikipedia.links_results) 657 | ); 658 | a.map(|(pages, cont)| { 659 | let page = match pages.into_iter().next() { 660 | Some(p) => p, 661 | None => return (Vec::new(), None), 662 | }; 663 | ( 664 | page.as_object() 665 | .and_then(|x| x.get("links")) 666 | .and_then(|x| x.as_array()) 667 | .map(|x| x.to_vec()) 668 | .unwrap_or(Vec::new()), 669 | cont, 670 | ) 671 | }) 672 | } 673 | 674 | /// Creates an iterator to view all internal links in the `Page`. 675 | pub fn get_links(&self) -> Result> { 676 | Iter::new(self) 677 | } 678 | 679 | fn request_categories( 680 | &self, 681 | cont: &Option, 682 | ) -> Result<(Vec, Option)> { 683 | let a: Result<(Vec, _)> = cont!( 684 | self, 685 | cont, 686 | ("prop", "categories"), 687 | ("cllimit", &*self.wikipedia.categories_results) 688 | ); 689 | a.map(|(pages, cont)| { 690 | let page = match pages.into_iter().next() { 691 | Some(p) => p, 692 | None => return (Vec::new(), None), 693 | }; 694 | ( 695 | page.as_object() 696 | .and_then(|x| x.get("categories")) 697 | .and_then(|x| x.as_array()) 698 | .map(|x| x.to_vec()) 699 | .unwrap_or(Vec::new()), 700 | cont, 701 | ) 702 | }) 703 | } 704 | 705 | /// Creates an iterator to view all categories of the `Page`. 706 | pub fn get_categories(&self) -> Result> { 707 | Iter::new(self) 708 | } 709 | 710 | fn request_langlinks( 711 | &self, 712 | cont: &Option, 713 | ) -> Result<(Vec, Option)> { 714 | let a: Result<(Vec, _)> = cont!( 715 | self, 716 | cont, 717 | ("prop", "langlinks"), 718 | ("lllimit", &*self.wikipedia.links_results) 719 | ); 720 | a.map(|(pages, cont)| { 721 | let page = match pages.into_iter().next() { 722 | Some(p) => p, 723 | None => return (Vec::new(), None), 724 | }; 725 | ( 726 | page.as_object() 727 | .and_then(|x| x.get("langlinks")) 728 | .and_then(|x| x.as_array()) 729 | .map(|x| x.to_vec()) 730 | .unwrap_or(Vec::new()), 731 | cont, 732 | ) 733 | }) 734 | } 735 | 736 | /// Creates an iterator to view all langlinks of the `Page`. 737 | /// This iterates over the page titles in all available languages. 738 | pub fn get_langlinks(&self) -> Result> { 739 | Iter::new(self) 740 | } 741 | 742 | /// Returns the latitude and longitude associated to the `Page` if any. 743 | pub fn get_coordinates(&self) -> Result> { 744 | let qp = self.identifier.query_param(); 745 | let params = vec![ 746 | ("prop", "coordinates"), 747 | ("colimit", "max"), 748 | ("redirects", ""), 749 | ("format", "json"), 750 | ("action", "query"), 751 | (&*qp.0, &*qp.1), 752 | ]; 753 | let q = self.wikipedia.query(params.into_iter())?; 754 | 755 | if let Some(r) = self.redirect(&q) { 756 | return Page::from_title(self.wikipedia, r).get_coordinates(); 757 | } 758 | 759 | let coord = match self 760 | .get_first_page(&q) 761 | .and_then(|x| x.as_object()) 762 | .and_then(|x| x.get("coordinates")) 763 | .and_then(|x| x.as_array()) 764 | .and_then(|x| x.iter().next()) 765 | .and_then(|x| x.as_object()) 766 | { 767 | Some(c) => c, 768 | None => return Ok(None), 769 | }; 770 | Ok(Some(( 771 | coord 772 | .get("lat") 773 | .and_then(|x| x.as_f64()) 774 | .ok_or(Error::JSONPathError)?, 775 | coord 776 | .get("lon") 777 | .and_then(|x| x.as_f64()) 778 | .ok_or(Error::JSONPathError)?, 779 | ))) 780 | } 781 | 782 | /// Fetches all sections of the article. 783 | pub fn get_sections(&self) -> Result> { 784 | let pageid = self.get_pageid()?; 785 | let params = vec![ 786 | ("prop", "sections"), 787 | ("format", "json"), 788 | ("action", "parse"), 789 | ("pageid", &*pageid), 790 | ]; 791 | let q = self.wikipedia.query(params.into_iter())?; 792 | 793 | Ok(q.as_object() 794 | .and_then(|x| x.get("parse")) 795 | .and_then(|x| x.as_object()) 796 | .and_then(|x| x.get("sections")) 797 | .and_then(|x| x.as_array()) 798 | .ok_or(Error::JSONPathError)? 799 | .iter() 800 | .filter_map(|x| { 801 | x.as_object() 802 | .and_then(|x| x.get("line")) 803 | .and_then(|x| x.as_str()) 804 | .map(|x| x.to_owned()) 805 | }) 806 | .collect()) 807 | } 808 | 809 | /// Fetches the content of a section. 810 | pub fn get_section_content(&self, title: &str) -> Result> { 811 | let headr = format!("== {} ==", title); 812 | let content = self.get_content()?; 813 | let index = match content.find(&*headr) { 814 | Some(i) => headr.len() + i, 815 | None => return Ok(None), 816 | }; 817 | let end = match content[index..].find("==") { 818 | Some(i) => index + i, 819 | None => content.len(), 820 | }; 821 | Ok(Some(content[index..end].to_owned())) 822 | } 823 | } 824 | 825 | impl<'a, A: http::HttpClient> PartialEq> for Page<'a, A> { 826 | fn eq(&self, other: &Page) -> bool { 827 | match self.identifier { 828 | TitlePageId::Title(ref t1) => match other.identifier { 829 | TitlePageId::Title(ref t2) => t1 == t2, 830 | TitlePageId::PageId(_) => false, 831 | }, 832 | TitlePageId::PageId(ref p1) => match other.identifier { 833 | TitlePageId::Title(_) => false, 834 | TitlePageId::PageId(ref p2) => p1 == p2, 835 | }, 836 | } 837 | } 838 | } 839 | 840 | #[cfg(test)] 841 | mod test { 842 | use super::http::HttpClient; 843 | use super::iter; 844 | use super::Wikipedia; 845 | use std::sync::Mutex; 846 | 847 | const DEFAULT_AGENT: &str = "wikipedia (https://github.com/seppo0010/wikipedia-rs)"; 848 | 849 | struct MockClient { 850 | pub url: Mutex>, 851 | pub user_agent: Option, 852 | pub bearer_token: Option, 853 | pub arguments: Mutex>, 854 | pub response: Mutex>, 855 | } 856 | 857 | impl Default for MockClient { 858 | fn default() -> Self { 859 | MockClient { 860 | url: Mutex::new(Vec::new()), 861 | user_agent: Some(DEFAULT_AGENT.into()), 862 | bearer_token: None, 863 | arguments: Mutex::new(Vec::new()), 864 | response: Mutex::new(Vec::new()), 865 | } 866 | } 867 | } 868 | 869 | impl super::http::HttpClient for MockClient { 870 | fn user_agent(&mut self, user_agent: String) { 871 | self.user_agent = Some(user_agent) 872 | } 873 | 874 | fn bearer_token(&mut self, bearer_token: String) { 875 | self.bearer_token = Some(bearer_token) 876 | } 877 | 878 | fn get<'a, I>(&self, base_url: &str, args: I) -> Result 879 | where 880 | I: Iterator, 881 | { 882 | self.url.lock().unwrap().push(base_url.to_owned()); 883 | self.arguments 884 | .lock() 885 | .unwrap() 886 | .push(args.map(|x| (x.0.to_owned(), x.1.to_owned())).collect()); 887 | Ok(self.response.lock().unwrap().remove(0)) 888 | } 889 | } 890 | 891 | #[test] 892 | fn base_url() { 893 | let mut wikipedia = Wikipedia::::default(); 894 | assert_eq!(wikipedia.base_url(), "https://en.wikipedia.org/w/api.php"); 895 | wikipedia.language = "es".to_owned(); 896 | assert_eq!(wikipedia.base_url(), "https://es.wikipedia.org/w/api.php"); 897 | 898 | wikipedia.set_base_url("https://hello.{language}.world/"); 899 | assert_eq!(wikipedia.base_url(), "https://hello.es.world/"); 900 | 901 | wikipedia.set_base_url("https://hello.world/"); 902 | assert_eq!(wikipedia.base_url(), "https://hello.world/"); 903 | } 904 | 905 | #[test] 906 | fn user_agent() { 907 | let mut wikipedia = Wikipedia::::default(); 908 | wikipedia 909 | .client 910 | .response 911 | .lock() 912 | .unwrap() 913 | .push("{}".to_owned()); 914 | wikipedia.search("hello world").unwrap_err(); 915 | assert_eq!(&*wikipedia.client.user_agent.unwrap(), DEFAULT_AGENT); 916 | 917 | let mut client = MockClient::default(); 918 | client.user_agent("hello world".to_owned()); 919 | client.response.lock().unwrap().push("{}".to_owned()); 920 | wikipedia.client = client; 921 | wikipedia.search("hello world").unwrap_err(); 922 | assert_eq!(&*wikipedia.client.user_agent.unwrap(), "hello world"); 923 | } 924 | 925 | #[test] 926 | fn search() { 927 | let wikipedia = Wikipedia::::default(); 928 | wikipedia.client.response.lock().unwrap().push( 929 | "{\"query\":{\"search\":[{\"title\":\"hello\"}, {\"title\":\"world\"}]}}".to_owned(), 930 | ); 931 | assert_eq!( 932 | wikipedia.search("hello world").unwrap(), 933 | vec!["hello".to_owned(), "world".to_owned(),] 934 | ); 935 | assert_eq!( 936 | *wikipedia.client.url.lock().unwrap(), 937 | vec!["https://en.wikipedia.org/w/api.php".to_owned()] 938 | ); 939 | assert_eq!( 940 | *wikipedia.client.arguments.lock().unwrap(), 941 | vec![vec![ 942 | ("list".to_owned(), "search".to_owned()), 943 | ("srprop".to_owned(), "".to_owned()), 944 | ("srlimit".to_owned(), "10".to_owned()), 945 | ("srsearch".to_owned(), "hello world".to_owned()), 946 | ("format".to_owned(), "json".to_owned()), 947 | ("action".to_owned(), "query".to_owned()) 948 | ]] 949 | ); 950 | } 951 | 952 | #[test] 953 | fn geosearch() { 954 | let wikipedia = Wikipedia::::default(); 955 | wikipedia.client.response.lock().unwrap().push( 956 | "{\"query\":{\"geosearch\":[{\"title\":\"hello\"}, {\"title\":\"world\"}]}}".to_owned(), 957 | ); 958 | assert_eq!( 959 | wikipedia.geosearch(-34.603333, -58.381667, 10).unwrap(), 960 | vec!["hello".to_owned(), "world".to_owned(),] 961 | ); 962 | assert_eq!( 963 | *wikipedia.client.url.lock().unwrap(), 964 | vec!["https://en.wikipedia.org/w/api.php".to_owned()] 965 | ); 966 | assert_eq!( 967 | *wikipedia.client.arguments.lock().unwrap(), 968 | vec![vec![ 969 | ("list".to_owned(), "geosearch".to_owned()), 970 | ("gsradius".to_owned(), "10".to_owned()), 971 | ("gscoord".to_owned(), "-34.603333|-58.381667".to_owned()), 972 | ("gslimit".to_owned(), "10".to_owned()), 973 | ("format".to_owned(), "json".to_owned()), 974 | ("action".to_owned(), "query".to_owned()) 975 | ]] 976 | ); 977 | } 978 | 979 | #[test] 980 | fn random_count() { 981 | let wikipedia = Wikipedia::::default(); 982 | wikipedia.client.response.lock().unwrap().push( 983 | "{\"query\":{\"random\":[{\"title\":\"hello\"}, {\"title\":\"world\"}]}}".to_owned(), 984 | ); 985 | assert_eq!( 986 | wikipedia.random_count(10).unwrap(), 987 | vec!["hello".to_owned(), "world".to_owned(),] 988 | ); 989 | assert_eq!( 990 | *wikipedia.client.url.lock().unwrap(), 991 | vec!["https://en.wikipedia.org/w/api.php".to_owned()] 992 | ); 993 | assert_eq!( 994 | *wikipedia.client.arguments.lock().unwrap(), 995 | vec![vec![ 996 | ("list".to_owned(), "random".to_owned()), 997 | ("rnnamespace".to_owned(), "0".to_owned()), 998 | ("rnlimit".to_owned(), "10".to_owned()), 999 | ("format".to_owned(), "json".to_owned()), 1000 | ("action".to_owned(), "query".to_owned()) 1001 | ]] 1002 | ); 1003 | } 1004 | 1005 | #[test] 1006 | fn random() { 1007 | let wikipedia = Wikipedia::::default(); 1008 | wikipedia.client.response.lock().unwrap().push( 1009 | "{\"query\":{\"random\":[{\"title\":\"hello\"}, {\"title\":\"world\"}]}}".to_owned(), 1010 | ); 1011 | assert_eq!(wikipedia.random().unwrap(), Some("hello".to_owned())); 1012 | assert_eq!( 1013 | *wikipedia.client.url.lock().unwrap(), 1014 | vec!["https://en.wikipedia.org/w/api.php".to_owned()] 1015 | ); 1016 | assert_eq!( 1017 | *wikipedia.client.arguments.lock().unwrap(), 1018 | vec![vec![ 1019 | ("list".to_owned(), "random".to_owned()), 1020 | ("rnnamespace".to_owned(), "0".to_owned()), 1021 | ("rnlimit".to_owned(), "1".to_owned()), 1022 | ("format".to_owned(), "json".to_owned()), 1023 | ("action".to_owned(), "query".to_owned()) 1024 | ]] 1025 | ); 1026 | } 1027 | 1028 | #[test] 1029 | fn page_content() { 1030 | let wikipedia = Wikipedia::::default(); 1031 | wikipedia 1032 | .client 1033 | .response 1034 | .lock() 1035 | .unwrap() 1036 | .push("{\"query\":{\"pages\":{\"a\":{\"extract\":\"hello\"}}}}".to_owned()); 1037 | let page = wikipedia.page_from_pageid("4138548".to_owned()); 1038 | let html = page.get_content().unwrap(); 1039 | assert_eq!(html, "hello".to_owned()); 1040 | assert_eq!( 1041 | *wikipedia.client.url.lock().unwrap(), 1042 | vec!["https://en.wikipedia.org/w/api.php".to_owned()] 1043 | ); 1044 | assert_eq!( 1045 | *wikipedia.client.arguments.lock().unwrap(), 1046 | vec![vec![ 1047 | ("prop".to_owned(), "extracts|revisions".to_owned()), 1048 | ("explaintext".to_owned(), "".to_owned()), 1049 | ("rvprop".to_owned(), "ids".to_owned()), 1050 | ("redirects".to_owned(), "".to_owned()), 1051 | ("format".to_owned(), "json".to_owned()), 1052 | ("action".to_owned(), "query".to_owned()), 1053 | ("pageids".to_owned(), "4138548".to_owned()), 1054 | ]] 1055 | ); 1056 | } 1057 | 1058 | #[test] 1059 | fn page_html_content() { 1060 | let wikipedia = Wikipedia::::default(); 1061 | wikipedia 1062 | .client 1063 | .response 1064 | .lock() 1065 | .unwrap() 1066 | .push("{\"query\":{\"pages\":{\"a\":{\"revisions\":[{\"*\":\"hello\"}]}}}}".to_owned()); 1067 | let page = wikipedia.page_from_pageid("4138548".to_owned()); 1068 | let html = page.get_html_content().unwrap(); 1069 | assert_eq!(html, "hello".to_owned()); 1070 | assert_eq!( 1071 | *wikipedia.client.url.lock().unwrap(), 1072 | vec!["https://en.wikipedia.org/w/api.php".to_owned()] 1073 | ); 1074 | assert_eq!( 1075 | *wikipedia.client.arguments.lock().unwrap(), 1076 | vec![vec![ 1077 | ("prop".to_owned(), "revisions".to_owned()), 1078 | ("rvprop".to_owned(), "content".to_owned()), 1079 | ("rvlimit".to_owned(), "1".to_owned()), 1080 | ("rvparse".to_owned(), "".to_owned()), 1081 | ("redirects".to_owned(), "".to_owned()), 1082 | ("format".to_owned(), "json".to_owned()), 1083 | ("action".to_owned(), "query".to_owned()), 1084 | ("pageids".to_owned(), "4138548".to_owned()), 1085 | ]] 1086 | ); 1087 | } 1088 | 1089 | #[test] 1090 | fn page_summary() { 1091 | let wikipedia = Wikipedia::::default(); 1092 | wikipedia 1093 | .client 1094 | .response 1095 | .lock() 1096 | .unwrap() 1097 | .push("{\"query\":{\"pages\":{\"a\":{\"extract\":\"hello\"}}}}".to_owned()); 1098 | let page = wikipedia.page_from_title("Parkinson's law of triviality".to_owned()); 1099 | let summary = page.get_summary().unwrap(); 1100 | assert_eq!(summary, "hello".to_owned()); 1101 | assert_eq!( 1102 | *wikipedia.client.url.lock().unwrap(), 1103 | vec!["https://en.wikipedia.org/w/api.php".to_owned()] 1104 | ); 1105 | assert_eq!( 1106 | *wikipedia.client.arguments.lock().unwrap(), 1107 | vec![vec![ 1108 | ("prop".to_owned(), "extracts".to_owned()), 1109 | ("explaintext".to_owned(), "".to_owned()), 1110 | ("exintro".to_owned(), "".to_owned()), 1111 | ("redirects".to_owned(), "".to_owned()), 1112 | ("format".to_owned(), "json".to_owned()), 1113 | ("action".to_owned(), "query".to_owned()), 1114 | ( 1115 | "titles".to_owned(), 1116 | "Parkinson\'s law of triviality".to_owned() 1117 | ) 1118 | ]] 1119 | ); 1120 | } 1121 | 1122 | #[test] 1123 | fn page_redirect_summary() { 1124 | let wikipedia = Wikipedia::::default(); 1125 | wikipedia 1126 | .client 1127 | .response 1128 | .lock() 1129 | .unwrap() 1130 | .push("{\"query\":{\"redirects\":[{\"to\":\"hello world\"}]}}".to_owned()); 1131 | wikipedia 1132 | .client 1133 | .response 1134 | .lock() 1135 | .unwrap() 1136 | .push("{\"query\":{\"pages\":{\"a\":{\"extract\":\"hello\"}}}}".to_owned()); 1137 | let page = wikipedia.page_from_title("Parkinson's law of triviality".to_owned()); 1138 | let summary = page.get_summary().unwrap(); 1139 | assert_eq!(summary, "hello".to_owned()); 1140 | assert_eq!( 1141 | *wikipedia.client.url.lock().unwrap(), 1142 | vec![ 1143 | "https://en.wikipedia.org/w/api.php".to_owned(), 1144 | "https://en.wikipedia.org/w/api.php".to_owned(), 1145 | ] 1146 | ); 1147 | assert_eq!( 1148 | *wikipedia.client.arguments.lock().unwrap(), 1149 | vec![ 1150 | vec![ 1151 | ("prop".to_owned(), "extracts".to_owned()), 1152 | ("explaintext".to_owned(), "".to_owned()), 1153 | ("exintro".to_owned(), "".to_owned()), 1154 | ("redirects".to_owned(), "".to_owned()), 1155 | ("format".to_owned(), "json".to_owned()), 1156 | ("action".to_owned(), "query".to_owned()), 1157 | ( 1158 | "titles".to_owned(), 1159 | "Parkinson\'s law of triviality".to_owned() 1160 | ) 1161 | ], 1162 | vec![ 1163 | ("prop".to_owned(), "extracts".to_owned()), 1164 | ("explaintext".to_owned(), "".to_owned()), 1165 | ("exintro".to_owned(), "".to_owned()), 1166 | ("redirects".to_owned(), "".to_owned()), 1167 | ("format".to_owned(), "json".to_owned()), 1168 | ("action".to_owned(), "query".to_owned()), 1169 | ("titles".to_owned(), "hello world".to_owned()) 1170 | ] 1171 | ] 1172 | ); 1173 | } 1174 | 1175 | #[test] 1176 | fn page_images() { 1177 | let wikipedia = Wikipedia::::default(); 1178 | wikipedia.client.response.lock().unwrap().push("{\"continue\": {\"lol\":\"1\"},\"query\":{\"pages\":{\"a\":{\"title\":\"Image 1\", \"imageinfo\":[{\"url\": \"http://example.com/image1.jpg\", \"descriptionurl\": \"http://example.com/image1.jpg.html\"}]}}}}".to_owned()); 1179 | wikipedia.client.response.lock().unwrap().push("{\"query\":{\"pages\":{\"a\":{\"title\":\"Image 2\", \"imageinfo\":[{\"url\": \"http://example.com/image2.jpg\", \"descriptionurl\": \"http://example.com/image2.jpg.html\"}]}}}}".to_owned()); 1180 | let page = wikipedia.page_from_title("Parkinson's law of triviality".to_owned()); 1181 | let images = page.get_images().unwrap().collect::>(); 1182 | assert_eq!( 1183 | images, 1184 | vec![ 1185 | iter::Image { 1186 | url: "http://example.com/image1.jpg".to_owned(), 1187 | title: "Image 1".to_owned(), 1188 | description_url: "http://example.com/image1.jpg.html".to_owned(), 1189 | }, 1190 | iter::Image { 1191 | url: "http://example.com/image2.jpg".to_owned(), 1192 | title: "Image 2".to_owned(), 1193 | description_url: "http://example.com/image2.jpg.html".to_owned(), 1194 | } 1195 | ] 1196 | ); 1197 | assert_eq!( 1198 | *wikipedia.client.url.lock().unwrap(), 1199 | vec![ 1200 | "https://en.wikipedia.org/w/api.php".to_owned(), 1201 | "https://en.wikipedia.org/w/api.php".to_owned(), 1202 | ] 1203 | ); 1204 | assert_eq!( 1205 | *wikipedia.client.arguments.lock().unwrap(), 1206 | vec![ 1207 | vec![ 1208 | ("generator".to_owned(), "images".to_owned()), 1209 | ("gimlimit".to_owned(), "max".to_owned()), 1210 | ("prop".to_owned(), "imageinfo".to_owned()), 1211 | ("iiprop".to_owned(), "url".to_owned()), 1212 | ("format".to_owned(), "json".to_owned()), 1213 | ("action".to_owned(), "query".to_owned()), 1214 | ( 1215 | "titles".to_owned(), 1216 | "Parkinson\'s law of triviality".to_owned() 1217 | ), 1218 | ("continue".to_owned(), "".to_owned()) 1219 | ], 1220 | vec![ 1221 | ("generator".to_owned(), "images".to_owned()), 1222 | ("gimlimit".to_owned(), "max".to_owned()), 1223 | ("prop".to_owned(), "imageinfo".to_owned()), 1224 | ("iiprop".to_owned(), "url".to_owned()), 1225 | ("format".to_owned(), "json".to_owned()), 1226 | ("action".to_owned(), "query".to_owned()), 1227 | ( 1228 | "titles".to_owned(), 1229 | "Parkinson\'s law of triviality".to_owned() 1230 | ), 1231 | ("lol".to_owned(), "1".to_owned()) 1232 | ] 1233 | ] 1234 | ); 1235 | } 1236 | 1237 | #[test] 1238 | fn page_coordinates() { 1239 | let wikipedia = Wikipedia::::default(); 1240 | wikipedia.client.response.lock().unwrap().push( 1241 | "{\"query\":{\"pages\":{\"a\":{\"coordinates\":[{\"lat\": 2.1, \"lon\":-1.3}]}}}}" 1242 | .to_owned(), 1243 | ); 1244 | let page = wikipedia.page_from_title("World".to_owned()); 1245 | let coordinates = page.get_coordinates().unwrap().unwrap(); 1246 | assert_eq!(coordinates, (2.1, -1.3)); 1247 | assert_eq!( 1248 | *wikipedia.client.url.lock().unwrap(), 1249 | vec!["https://en.wikipedia.org/w/api.php".to_owned()] 1250 | ); 1251 | assert_eq!( 1252 | *wikipedia.client.arguments.lock().unwrap(), 1253 | vec![vec![ 1254 | ("prop".to_owned(), "coordinates".to_owned()), 1255 | ("colimit".to_owned(), "max".to_owned()), 1256 | ("redirects".to_owned(), "".to_owned()), 1257 | ("format".to_owned(), "json".to_owned()), 1258 | ("action".to_owned(), "query".to_owned()), 1259 | ("titles".to_owned(), "World".to_owned()) 1260 | ]] 1261 | ); 1262 | } 1263 | 1264 | #[test] 1265 | fn page_no_coordinates() { 1266 | let wikipedia = Wikipedia::::default(); 1267 | wikipedia 1268 | .client 1269 | .response 1270 | .lock() 1271 | .unwrap() 1272 | .push("{\"query\":{\"pages\":{\"a\":{}}}}".to_owned()); 1273 | let page = wikipedia.page_from_title("World".to_owned()); 1274 | assert!(page.get_coordinates().unwrap().is_none()); 1275 | assert_eq!( 1276 | *wikipedia.client.url.lock().unwrap(), 1277 | vec!["https://en.wikipedia.org/w/api.php".to_owned()] 1278 | ); 1279 | assert_eq!( 1280 | *wikipedia.client.arguments.lock().unwrap(), 1281 | vec![vec![ 1282 | ("prop".to_owned(), "coordinates".to_owned()), 1283 | ("colimit".to_owned(), "max".to_owned()), 1284 | ("redirects".to_owned(), "".to_owned()), 1285 | ("format".to_owned(), "json".to_owned()), 1286 | ("action".to_owned(), "query".to_owned()), 1287 | ("titles".to_owned(), "World".to_owned()) 1288 | ]] 1289 | ); 1290 | } 1291 | 1292 | #[test] 1293 | fn get_references() { 1294 | let wikipedia = Wikipedia::::default(); 1295 | wikipedia.client.response.lock().unwrap().push("{\"continue\": {\"lol\":\"1\"},\"query\":{\"pages\":{\"a\":{\"extlinks\":[{\"*\": \"//example.com/reference1.html\"}]}}}}".to_owned()); 1296 | wikipedia.client.response.lock().unwrap().push("{\"query\":{\"pages\":{\"a\":{\"extlinks\":[{\"*\": \"//example.com/reference2.html\"}]}}}}".to_owned()); 1297 | let page = wikipedia.page_from_title("World".to_owned()); 1298 | assert_eq!( 1299 | page.get_references().unwrap().collect::>(), 1300 | vec![ 1301 | iter::Reference { 1302 | url: "http://example.com/reference1.html".to_owned(), 1303 | }, 1304 | iter::Reference { 1305 | url: "http://example.com/reference2.html".to_owned(), 1306 | } 1307 | ] 1308 | ); 1309 | assert_eq!( 1310 | *wikipedia.client.url.lock().unwrap(), 1311 | vec![ 1312 | "https://en.wikipedia.org/w/api.php".to_owned(), 1313 | "https://en.wikipedia.org/w/api.php".to_owned(), 1314 | ] 1315 | ); 1316 | assert_eq!( 1317 | *wikipedia.client.arguments.lock().unwrap(), 1318 | vec![ 1319 | vec![ 1320 | ("prop".to_owned(), "extlinks".to_owned()), 1321 | ("ellimit".to_owned(), "max".to_owned()), 1322 | ("format".to_owned(), "json".to_owned()), 1323 | ("action".to_owned(), "query".to_owned()), 1324 | ("titles".to_owned(), "World".to_owned()), 1325 | ("continue".to_owned(), "".to_owned()) 1326 | ], 1327 | vec![ 1328 | ("prop".to_owned(), "extlinks".to_owned()), 1329 | ("ellimit".to_owned(), "max".to_owned()), 1330 | ("format".to_owned(), "json".to_owned()), 1331 | ("action".to_owned(), "query".to_owned()), 1332 | ("titles".to_owned(), "World".to_owned()), 1333 | ("lol".to_owned(), "1".to_owned()) 1334 | ] 1335 | ] 1336 | ); 1337 | } 1338 | 1339 | #[test] 1340 | fn get_links() { 1341 | let wikipedia = Wikipedia::::default(); 1342 | wikipedia.client.response.lock().unwrap().push("{\"continue\": {\"lol\":\"1\"},\"query\":{\"pages\":{\"a\":{\"links\":[{\"title\": \"Hello\"}]}}}}".to_owned()); 1343 | wikipedia.client.response.lock().unwrap().push( 1344 | "{\"query\":{\"pages\":{\"a\":{\"links\":[{\"title\": \"World\"}]}}}}".to_owned(), 1345 | ); 1346 | let page = wikipedia.page_from_title("World".to_owned()); 1347 | assert_eq!( 1348 | page.get_links().unwrap().collect::>(), 1349 | vec![ 1350 | iter::Link { 1351 | title: "Hello".to_owned(), 1352 | }, 1353 | iter::Link { 1354 | title: "World".to_owned(), 1355 | } 1356 | ] 1357 | ); 1358 | assert_eq!( 1359 | *wikipedia.client.url.lock().unwrap(), 1360 | vec![ 1361 | "https://en.wikipedia.org/w/api.php".to_owned(), 1362 | "https://en.wikipedia.org/w/api.php".to_owned(), 1363 | ] 1364 | ); 1365 | assert_eq!( 1366 | *wikipedia.client.arguments.lock().unwrap(), 1367 | vec![ 1368 | vec![ 1369 | ("prop".to_owned(), "links".to_owned()), 1370 | ("plnamespace".to_owned(), "0".to_owned()), 1371 | ("ellimit".to_owned(), "max".to_owned()), 1372 | ("format".to_owned(), "json".to_owned()), 1373 | ("action".to_owned(), "query".to_owned()), 1374 | ("titles".to_owned(), "World".to_owned()), 1375 | ("continue".to_owned(), "".to_owned()), 1376 | ], 1377 | vec![ 1378 | ("prop".to_owned(), "links".to_owned()), 1379 | ("plnamespace".to_owned(), "0".to_owned()), 1380 | ("ellimit".to_owned(), "max".to_owned()), 1381 | ("format".to_owned(), "json".to_owned()), 1382 | ("action".to_owned(), "query".to_owned()), 1383 | ("titles".to_owned(), "World".to_owned()), 1384 | ("lol".to_owned(), "1".to_owned()), 1385 | ] 1386 | ] 1387 | ); 1388 | } 1389 | 1390 | #[test] 1391 | fn get_categories() { 1392 | let wikipedia = Wikipedia::::default(); 1393 | wikipedia.client.response.lock().unwrap().push("{\"continue\": {\"lol\":\"1\"},\"query\":{\"pages\":{\"a\":{\"categories\":[{\"title\": \"Hello\"}]}}}}".to_owned()); 1394 | wikipedia.client.response.lock().unwrap().push( 1395 | "{\"query\":{\"pages\":{\"a\":{\"categories\":[{\"title\": \"Category: World\"}]}}}}" 1396 | .to_owned(), 1397 | ); 1398 | let page = wikipedia.page_from_title("World".to_owned()); 1399 | assert_eq!( 1400 | page.get_categories().unwrap().collect::>(), 1401 | vec![ 1402 | iter::Category { 1403 | title: "Hello".to_owned(), 1404 | }, 1405 | iter::Category { 1406 | title: "World".to_owned(), 1407 | } 1408 | ] 1409 | ); 1410 | assert_eq!( 1411 | *wikipedia.client.url.lock().unwrap(), 1412 | vec![ 1413 | "https://en.wikipedia.org/w/api.php".to_owned(), 1414 | "https://en.wikipedia.org/w/api.php".to_owned(), 1415 | ] 1416 | ); 1417 | assert_eq!( 1418 | *wikipedia.client.arguments.lock().unwrap(), 1419 | vec![ 1420 | vec![ 1421 | ("prop".to_owned(), "categories".to_owned()), 1422 | ("cllimit".to_owned(), "max".to_owned()), 1423 | ("format".to_owned(), "json".to_owned()), 1424 | ("action".to_owned(), "query".to_owned()), 1425 | ("titles".to_owned(), "World".to_owned()), 1426 | ("continue".to_owned(), "".to_owned()), 1427 | ], 1428 | vec![ 1429 | ("prop".to_owned(), "categories".to_owned()), 1430 | ("cllimit".to_owned(), "max".to_owned()), 1431 | ("format".to_owned(), "json".to_owned()), 1432 | ("action".to_owned(), "query".to_owned()), 1433 | ("titles".to_owned(), "World".to_owned()), 1434 | ("lol".to_owned(), "1".to_owned()), 1435 | ] 1436 | ] 1437 | ); 1438 | } 1439 | 1440 | #[test] 1441 | fn sections() { 1442 | let wikipedia = Wikipedia::::default(); 1443 | wikipedia.client.response.lock().unwrap().push( 1444 | "{\"parse\":{\"sections\":[{\"line\":\"hello\"}, {\"line\":\"world\"}]}}".to_owned(), 1445 | ); 1446 | let page = wikipedia.page_from_pageid("123".to_owned()); 1447 | assert_eq!( 1448 | page.get_sections().unwrap(), 1449 | vec!["hello".to_owned(), "world".to_owned()] 1450 | ); 1451 | assert_eq!( 1452 | *wikipedia.client.url.lock().unwrap(), 1453 | vec!["https://en.wikipedia.org/w/api.php".to_owned()] 1454 | ); 1455 | assert_eq!( 1456 | *wikipedia.client.arguments.lock().unwrap(), 1457 | vec![vec![ 1458 | ("prop".to_owned(), "sections".to_owned()), 1459 | ("format".to_owned(), "json".to_owned()), 1460 | ("action".to_owned(), "parse".to_owned()), 1461 | ("pageid".to_owned(), "123".to_owned()) 1462 | ]] 1463 | ); 1464 | } 1465 | 1466 | #[test] 1467 | fn languages() { 1468 | let wikipedia = Wikipedia::::default(); 1469 | wikipedia.client.response.lock().unwrap().push("{\"query\":{\"languages\":[{\"*\":\"hello\", \"code\":\"world\"}, {\"*\":\"foo\", \"code\":\"bar\"}]}}".to_owned()); 1470 | assert_eq!( 1471 | wikipedia.get_languages().unwrap(), 1472 | vec![ 1473 | ("world".to_owned(), "hello".to_owned()), 1474 | ("bar".to_owned(), "foo".to_owned()), 1475 | ] 1476 | ); 1477 | assert_eq!( 1478 | *wikipedia.client.url.lock().unwrap(), 1479 | vec!["https://en.wikipedia.org/w/api.php".to_owned()] 1480 | ); 1481 | assert_eq!( 1482 | *wikipedia.client.arguments.lock().unwrap(), 1483 | vec![vec![ 1484 | ("meta".to_owned(), "siteinfo".to_owned()), 1485 | ("siprop".to_owned(), "languages".to_owned()), 1486 | ("format".to_owned(), "json".to_owned()), 1487 | ("action".to_owned(), "query".to_owned()) 1488 | ]] 1489 | ); 1490 | } 1491 | } 1492 | --------------------------------------------------------------------------------