├── .gitignore ├── BayesCard ├── .DS_Store ├── Evaluation │ ├── .DS_Store │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── cardinality_estimation.cpython-37.pyc │ │ ├── parse_query_imdb.cpython-37.pyc │ │ └── utils.cpython-37.pyc │ ├── cardinality_estimation.py │ ├── generate_sql.py │ ├── parse_query_imdb.py │ └── utils.py ├── Models │ ├── .DS_Store │ ├── BN_single_model.py │ ├── Bayescard_BN.py │ ├── StatisticalTypes.py │ ├── __init__.py │ └── tools.py ├── Testing │ ├── .DS_Store │ ├── .gitignore │ ├── BN_testing.py │ ├── BN_training.py │ ├── __init__.py │ └── __pycache__ │ │ ├── BN_testing.cpython-37.pyc │ │ ├── BN_training.cpython-37.pyc │ │ └── __init__.cpython-37.pyc ├── __init__.py └── __pycache__ │ ├── __init__.cpython-37.pyc │ └── __init__.cpython-38.pyc ├── Evaluation ├── __init__.py ├── testing.py ├── training.py └── updating.py ├── IMDB-JOB ├── 10a.sql ├── 10b.sql ├── 10c.sql ├── 11a.sql ├── 11b.sql ├── 11c.sql ├── 11d.sql ├── 12a.sql ├── 12b.sql ├── 12c.sql ├── 13a.sql ├── 13b.sql ├── 13c.sql ├── 13d.sql ├── 14a.sql ├── 14b.sql ├── 14c.sql ├── 15a.sql ├── 15b.sql ├── 15c.sql ├── 15d.sql ├── 16a.sql ├── 16b.sql ├── 16c.sql ├── 16d.sql ├── 17a.sql ├── 17b.sql ├── 17c.sql ├── 17d.sql ├── 17e.sql ├── 17f.sql ├── 18a.sql ├── 18b.sql ├── 18c.sql ├── 19a.sql ├── 19b.sql ├── 19c.sql ├── 19d.sql ├── 1a.sql ├── 1b.sql ├── 1c.sql ├── 1d.sql ├── 20a.sql ├── 20b.sql ├── 20c.sql ├── 21a.sql ├── 21b.sql ├── 21c.sql ├── 22a.sql ├── 22b.sql ├── 22c.sql ├── 22d.sql ├── 23a.sql ├── 23b.sql ├── 23c.sql ├── 24a.sql ├── 24b.sql ├── 25a.sql ├── 25b.sql ├── 25c.sql ├── 26a.sql ├── 26b.sql ├── 26c.sql ├── 27a.sql ├── 27b.sql ├── 27c.sql ├── 28a.sql ├── 28b.sql ├── 28c.sql ├── 29a.sql ├── 29b.sql ├── 29c.sql ├── 2a.sql ├── 2b.sql ├── 2c.sql ├── 2d.sql ├── 30a.sql ├── 30b.sql ├── 30c.sql ├── 31a.sql ├── 31b.sql ├── 31c.sql ├── 32a.sql ├── 32b.sql ├── 33a.sql ├── 33b.sql ├── 33c.sql ├── 3a.sql ├── 3b.sql ├── 3c.sql ├── 4a.sql ├── 4b.sql ├── 4c.sql ├── 5a.sql ├── 5b.sql ├── 5c.sql ├── 6a.sql ├── 6b.sql ├── 6c.sql ├── 6d.sql ├── 6e.sql ├── 6f.sql ├── 7a.sql ├── 7b.sql ├── 7c.sql ├── 8a.sql ├── 8b.sql ├── 8c.sql ├── 8d.sql ├── 9a.sql ├── 9b.sql ├── 9c.sql ├── 9d.sql ├── all_queries.sql ├── all_queries_original.sql ├── job_sub_plan_queries.txt └── temp.sql ├── Join_scheme ├── .DS_Store ├── __init__.py ├── binning.py ├── bound.py ├── data_prepare.py ├── factor.py ├── join_graph.py └── tools.py ├── Pgmpy ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ └── global_vars.cpython-37.pyc ├── base │ ├── DAG.py │ ├── UndirectedGraph.py │ ├── __init__.py │ └── __pycache__ │ │ ├── DAG.cpython-37.pyc │ │ ├── UndirectedGraph.cpython-37.pyc │ │ └── __init__.cpython-37.pyc ├── data │ ├── Data.py │ ├── __init__.py │ └── __pycache__ │ │ ├── Data.cpython-37.pyc │ │ └── __init__.cpython-37.pyc ├── estimators │ ├── BayesianEstimator.py │ ├── CITests.py │ ├── MLE.py │ ├── __init__.py │ ├── __pycache__ │ │ ├── BayesianEstimator.cpython-37.pyc │ │ ├── CITests.cpython-37.pyc │ │ ├── MLE.cpython-37.pyc │ │ ├── __init__.cpython-37.pyc │ │ └── base.cpython-37.pyc │ └── base.py ├── extern │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ └── tabulate.cpython-37.pyc │ └── tabulate.py ├── factors │ ├── FactorSet.py │ ├── __init__.py │ ├── __pycache__ │ │ ├── FactorSet.cpython-37.pyc │ │ ├── __init__.cpython-37.pyc │ │ └── base.cpython-37.pyc │ ├── base.py │ ├── continuous │ │ ├── ContinuousFactor.py │ │ ├── LinearGaussianCPD.py │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── ContinuousFactor.cpython-37.pyc │ │ │ ├── LinearGaussianCPD.cpython-37.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ └── discretize.cpython-37.pyc │ │ └── discretize.py │ ├── discrete │ │ ├── CPD.py │ │ ├── DiscreteFactor.py │ │ ├── JointProbabilityDistribution.py │ │ ├── __init__.py │ │ └── __pycache__ │ │ │ ├── CPD.cpython-37.pyc │ │ │ ├── DiscreteFactor.cpython-37.pyc │ │ │ ├── JointProbabilityDistribution.cpython-37.pyc │ │ │ └── __init__.cpython-37.pyc │ └── distributions │ │ ├── CanonicalDistribution.py │ │ ├── CustomDistribution.py │ │ ├── GaussianDistribution.py │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── CanonicalDistribution.cpython-37.pyc │ │ ├── CustomDistribution.cpython-37.pyc │ │ ├── GaussianDistribution.cpython-37.pyc │ │ ├── __init__.cpython-37.pyc │ │ └── base.cpython-37.pyc │ │ └── base.py ├── global_vars.py ├── independencies │ ├── Independencies.py │ ├── __init__.py │ └── __pycache__ │ │ ├── Independencies.cpython-37.pyc │ │ └── __init__.cpython-37.pyc ├── inference │ ├── CausalInference.py │ ├── EliminationOrder.py │ ├── ExactInference.py │ ├── ExactInferenceTorch.py │ ├── __init__.py │ ├── __pycache__ │ │ ├── EliminationOrder.cpython-37.pyc │ │ ├── ExactInference.cpython-37.pyc │ │ ├── ExactInferenceTorch.cpython-37.pyc │ │ ├── __init__.cpython-37.pyc │ │ └── base.cpython-37.pyc │ └── base.py ├── models │ ├── BayesianModel.py │ ├── ClusterGraph.py │ ├── JunctionTree.py │ ├── LinearGaussianBayesianNetwork.py │ ├── MarkovModel.py │ ├── __init__.py │ └── __pycache__ │ │ ├── BayesianModel.cpython-37.pyc │ │ ├── ClusterGraph.cpython-37.pyc │ │ ├── JunctionTree.cpython-37.pyc │ │ ├── LinearGaussianBayesianNetwork.cpython-37.pyc │ │ ├── MarkovModel.cpython-37.pyc │ │ └── __init__.cpython-37.pyc ├── readwrite │ ├── BIF.py │ ├── PomdpX.py │ ├── UAI.py │ ├── XMLBIF.py │ ├── XMLBeliefNetwork.py │ ├── __init__.py │ └── __pycache__ │ │ ├── BIF.cpython-37.pyc │ │ ├── PomdpX.cpython-37.pyc │ │ ├── UAI.cpython-37.pyc │ │ ├── XMLBIF.cpython-37.pyc │ │ ├── XMLBeliefNetwork.cpython-37.pyc │ │ └── __init__.cpython-37.pyc ├── sampling │ ├── HMC.py │ ├── NUTS.py │ ├── Sampling.py │ ├── __init__.py │ └── base.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── check_functions.cpython-37.pyc │ ├── decorators.cpython-37.pyc │ ├── mathext.cpython-37.pyc │ ├── optimizer.cpython-37.pyc │ └── state_name.cpython-37.pyc │ ├── check_functions.py │ ├── decorators.py │ ├── mathext.py │ ├── optimizer.py │ ├── sets.py │ └── state_name.py ├── README.md ├── Sampling ├── __init__.py ├── create_binned_cols.py ├── get_query_binned_cards.py ├── load_sample.py ├── sample_on_the_fly.py └── utils │ ├── __init__.py │ ├── parse_sql.py │ ├── query_storage.py │ └── utils.py ├── Schemas ├── .DS_Store ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── __init__.cpython-38.pyc │ ├── graph_representation.cpython-37.pyc │ └── graph_representation.cpython-38.pyc ├── graph_representation.py ├── imdb │ ├── __init__.py │ └── schema.py ├── ssb │ ├── __init__.py │ └── schema.py └── stats │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── __init__.cpython-38.pyc │ ├── schema.cpython-37.pyc │ └── schema.cpython-38.pyc │ └── schema.py ├── checkpoints ├── binned_cards_1.0 │ ├── 10a.pkl │ ├── 10b.pkl │ ├── 10c.pkl │ ├── 11a.pkl │ ├── 11b.pkl │ ├── 11c.pkl │ ├── 11d.pkl │ ├── 12a.pkl │ ├── 12b.pkl │ ├── 12c.pkl │ ├── 13a.pkl │ ├── 13b.pkl │ ├── 13c.pkl │ ├── 13d.pkl │ ├── 14a.pkl │ ├── 14b.pkl │ ├── 14c.pkl │ ├── 15a.pkl │ ├── 15b.pkl │ ├── 15c.pkl │ ├── 15d.pkl │ ├── 16a.pkl │ ├── 16b.pkl │ ├── 16c.pkl │ ├── 16d.pkl │ ├── 17a.pkl │ ├── 17b.pkl │ ├── 17c.pkl │ ├── 17d.pkl │ ├── 17e.pkl │ ├── 17f.pkl │ ├── 18a.pkl │ ├── 18b.pkl │ ├── 18c.pkl │ ├── 19a.pkl │ ├── 19b.pkl │ ├── 19c.pkl │ ├── 19d.pkl │ ├── 1a.pkl │ ├── 1b.pkl │ ├── 1c.pkl │ ├── 1d.pkl │ ├── 20a.pkl │ ├── 20b.pkl │ ├── 20c.pkl │ ├── 21a.pkl │ ├── 21b.pkl │ ├── 21c.pkl │ ├── 22a.pkl │ ├── 22b.pkl │ ├── 22c.pkl │ ├── 22d.pkl │ ├── 23a.pkl │ ├── 23b.pkl │ ├── 23c.pkl │ ├── 24a.pkl │ ├── 24b.pkl │ ├── 25a.pkl │ ├── 25b.pkl │ ├── 25c.pkl │ ├── 26a.pkl │ ├── 26b.pkl │ ├── 26c.pkl │ ├── 27a.pkl │ ├── 27b.pkl │ ├── 27c.pkl │ ├── 28a.pkl │ ├── 28b.pkl │ ├── 28c.pkl │ ├── 29a.pkl │ ├── 29b.pkl │ ├── 29c.pkl │ ├── 2a.pkl │ ├── 2b.pkl │ ├── 2c.pkl │ ├── 2d.pkl │ ├── 30a.pkl │ ├── 30b.pkl │ ├── 30c.pkl │ ├── 31a.pkl │ ├── 31b.pkl │ ├── 31c.pkl │ ├── 32a.pkl │ ├── 32b.pkl │ ├── 33a.pkl │ ├── 33b.pkl │ ├── 33c.pkl │ ├── 3a.pkl │ ├── 3b.pkl │ ├── 3c.pkl │ ├── 4a.pkl │ ├── 4b.pkl │ ├── 4c.pkl │ ├── 5a.pkl │ ├── 5b.pkl │ ├── 5c.pkl │ ├── 6a.pkl │ ├── 6b.pkl │ ├── 6c.pkl │ ├── 6d.pkl │ ├── 6e.pkl │ ├── 6f.pkl │ ├── 7a.pkl │ ├── 7b.pkl │ ├── 7c.pkl │ ├── 8a.pkl │ ├── 8b.pkl │ ├── 8c.pkl │ ├── 8d.pkl │ ├── 9a.pkl │ ├── 9b.pkl │ ├── 9c.pkl │ └── 9d.pkl ├── derived_query_file.pkl ├── gt_no_filter.pkl └── stats_CEB_query.sql ├── requirements.txt ├── run_experiment.py └── send_query.py /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | .idea 3 | 4 | __pycache__ 5 | *.pyc 6 | .mypy_cache 7 | brad.egg-info 8 | 9 | .DS_Store 10 | ._.DS_Store 11 | *.swp 12 | 13 | *.ipynb 14 | *.xml 15 | 16 | Debug -------------------------------------------------------------------------------- /BayesCard/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/BayesCard/.DS_Store -------------------------------------------------------------------------------- /BayesCard/Evaluation/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/BayesCard/Evaluation/.DS_Store -------------------------------------------------------------------------------- /BayesCard/Evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/BayesCard/Evaluation/__init__.py -------------------------------------------------------------------------------- /BayesCard/Evaluation/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/BayesCard/Evaluation/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /BayesCard/Evaluation/__pycache__/cardinality_estimation.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/BayesCard/Evaluation/__pycache__/cardinality_estimation.cpython-37.pyc -------------------------------------------------------------------------------- /BayesCard/Evaluation/__pycache__/parse_query_imdb.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/BayesCard/Evaluation/__pycache__/parse_query_imdb.cpython-37.pyc -------------------------------------------------------------------------------- /BayesCard/Evaluation/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/BayesCard/Evaluation/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /BayesCard/Models/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/BayesCard/Models/.DS_Store -------------------------------------------------------------------------------- /BayesCard/Models/StatisticalTypes.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | import logging 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | 7 | class MetaType(Enum): 8 | REAL = 1 9 | BINARY = 2 10 | DISCRETE = 3 11 | 12 | 13 | class Type(Enum): 14 | REAL = (1, MetaType.REAL) 15 | INTERVAL = (2, MetaType.REAL) 16 | POSITIVE = (3, MetaType.REAL) 17 | CATEGORICAL = (4, MetaType.DISCRETE) 18 | ORDINAL = (5, MetaType.DISCRETE) 19 | COUNT = (6, MetaType.DISCRETE) 20 | BINARY = (7, MetaType.BINARY) 21 | 22 | def __init__(self, enum_val, meta_type): 23 | self._enum_val = enum_val 24 | self._meta_type = meta_type 25 | 26 | @property 27 | def meta_type(self): 28 | return self._meta_type 29 | 30 | 31 | META_TYPE_MAP = { 32 | MetaType.REAL: [Type.REAL, Type.INTERVAL, Type.POSITIVE], 33 | MetaType.BINARY: [Type.BINARY], 34 | MetaType.DISCRETE: [Type.CATEGORICAL, Type.ORDINAL, Type.COUNT], 35 | } -------------------------------------------------------------------------------- /BayesCard/Models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/BayesCard/Models/__init__.py -------------------------------------------------------------------------------- /BayesCard/Testing/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/BayesCard/Testing/.DS_Store -------------------------------------------------------------------------------- /BayesCard/Testing/.gitignore: -------------------------------------------------------------------------------- 1 | check_points/ -------------------------------------------------------------------------------- /BayesCard/Testing/BN_training.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pickle 3 | from Join_scheme.data_prepare import process_stats_data 4 | from BayesCard.Models.Bayescard_BN import Bayescard_BN 5 | 6 | def train_DMV(csv_path, model_path, algorithm, max_parents, sample_size): 7 | data = pd.read_csv(csv_path) 8 | new_cols = [] 9 | #removing unuseful columns 10 | for col in data.columns: 11 | if col in ['VIN', 'Zip', 'City', 'Make', 'Unladen Weight', 'Maximum Gross Weight', 'Passengers', 12 | 'Reg Valid Date', 'Reg Expiration Date', 'Color']: 13 | data = data.drop(col, axis=1) 14 | else: 15 | new_cols.append(col.replace(" ", "_")) 16 | data.columns = new_cols 17 | BN = Bayescard_BN('dmv') 18 | BN.build_from_data(data, algorithm=algorithm, max_parents=max_parents, ignore_cols=['id'], sample_size=sample_size) 19 | model_path += f"/{algorithm}_{max_parents}.pkl" 20 | pickle.dump(BN, open(model_path, 'wb'), pickle.HIGHEST_PROTOCOL) 21 | print(f"model saved at {model_path}") 22 | return None 23 | 24 | def train_Census(csv_path, model_path, algorithm, max_parents, sample_size): 25 | df = pd.read_csv(csv_path, header=0, sep=",") 26 | df = df.drop("caseid", axis=1) 27 | df = df.dropna(axis=0) 28 | BN = Bayescard_BN('Census') 29 | BN.build_from_data(df, algorithm=algorithm, max_parents=max_parents, ignore_cols=['id'], sample_size=sample_size) 30 | model_path += f"/{algorithm}_{max_parents}.pkl" 31 | pickle.dump(BN, open(model_path, 'wb'), pickle.HIGHEST_PROTOCOL) 32 | print(f"model saved at {model_path}") 33 | return None 34 | 35 | 36 | def train_stats(data_path, model_folder, n_bins=500, save_bucket_bins=False): 37 | data, null_values, key_attrs, all_bin_modes = process_stats_data(data_path, model_folder, n_bins, save_bucket_bins) 38 | for table in data: 39 | print(f"training BayesCard on table {table}") 40 | bn = Bayescard_BN(table, key_attrs[table], null_values=null_values[table]) 41 | bn.build_from_data(data[table]) 42 | model_path = model_folder + f"/{table}.pkl" 43 | pickle.dump(bn, open(model_path, 'wb'), pickle.HIGHEST_PROTOCOL) 44 | print(f"model saved at {model_path}") 45 | 46 | -------------------------------------------------------------------------------- /BayesCard/Testing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/BayesCard/Testing/__init__.py -------------------------------------------------------------------------------- /BayesCard/Testing/__pycache__/BN_testing.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/BayesCard/Testing/__pycache__/BN_testing.cpython-37.pyc -------------------------------------------------------------------------------- /BayesCard/Testing/__pycache__/BN_training.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/BayesCard/Testing/__pycache__/BN_training.cpython-37.pyc -------------------------------------------------------------------------------- /BayesCard/Testing/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/BayesCard/Testing/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /BayesCard/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/BayesCard/__init__.py -------------------------------------------------------------------------------- /BayesCard/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/BayesCard/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /BayesCard/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/BayesCard/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Evaluation/__init__.py -------------------------------------------------------------------------------- /IMDB-JOB/10a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS uncredited_voiced_character, MIN(t.title) AS russian_movie FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note like '%(voice)%' and ci.note like '%(uncredited)%' AND cn.country_code = '[ru]' AND rt.role = 'actor' AND t.production_year > 2005 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/10b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character, MIN(t.title) AS russian_mov_with_actor_producer FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note like '%(producer)%' AND cn.country_code = '[ru]' AND rt.role = 'actor' AND t.production_year > 2010 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/10c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character, MIN(t.title) AS movie_with_american_producer FROM char_name AS chn, cast_info AS ci, company_name AS cn, company_type AS ct, movie_companies AS mc, role_type AS rt, title AS t WHERE ci.note like '%(producer)%' AND cn.country_code = '[us]' AND t.production_year > 1990 AND t.id = mc.movie_id AND t.id = ci.movie_id AND ci.movie_id = mc.movie_id AND chn.id = ci.person_role_id AND rt.id = ci.role_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/11a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, MIN(lt.link) AS movie_link_type, MIN(t.title) AS non_polish_sequel_movie FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/11b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, MIN(lt.link) AS movie_link_type, MIN(t.title) AS sequel_movie FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follows%' AND mc.note IS NULL AND t.production_year = 1998 and t.title like '%Money%' AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/11c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, MIN(mc.note) AS production_note, MIN(t.title) AS movie_based_on_book FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' and (cn.name like '20th Century Fox%' or cn.name like 'Twentieth Century Fox%') AND ct.kind != 'production companies' and ct.kind is not NULL AND k.keyword in ('sequel', 'revenge', 'based-on-novel') AND mc.note is not NULL AND t.production_year > 1950 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/11d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS from_company, MIN(mc.note) AS production_note, MIN(t.title) AS movie_based_on_book FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND ct.kind != 'production companies' and ct.kind is not NULL AND k.keyword in ('sequel', 'revenge', 'based-on-novel') AND mc.note is not NULL AND t.production_year > 1950 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/12a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS drama_horror_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code = '[us]' AND ct.kind = 'production companies' AND it1.info = 'genres' AND it2.info = 'rating' AND mi.info in ('Drama', 'Horror') AND mi_idx.info > '8.0' AND t.production_year between 2005 and 2008 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/12b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS budget, MIN(t.title) AS unsuccsessful_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code ='[us]' AND ct.kind is not NULL and (ct.kind ='production companies' or ct.kind = 'distributors') AND it1.info ='budget' AND it2.info ='bottom 10 rank' AND t.production_year >2000 AND (t.title LIKE 'Birdemic%' OR t.title LIKE '%Movie%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/12c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS mainstream_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, title AS t WHERE cn.country_code = '[us]' AND ct.kind = 'production companies' AND it1.info = 'genres' AND it2.info = 'rating' AND mi.info in ('Drama', 'Horror', 'Western', 'Family') AND mi_idx.info > '7.0' AND t.production_year between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND mi.info_type_id = it1.id AND mi_idx.info_type_id = it2.id AND t.id = mc.movie_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/13a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, MIN(miidx.info) AS rating, MIN(t.title) AS german_movie FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[de]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/13b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie_about_winning FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND t.title != '' AND (t.title LIKE '%Champion%' OR t.title LIKE '%Loser%') AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/13c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie_about_winning FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND t.title != '' AND (t.title LIKE 'Champion%' OR t.title LIKE 'Loser%') AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/13d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(miidx.info) AS rating, MIN(t.title) AS movie FROM company_name AS cn, company_type AS ct, info_type AS it, info_type AS it2, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS miidx, title AS t WHERE cn.country_code ='[us]' AND ct.kind ='production companies' AND it.info ='rating' AND it2.info ='release dates' AND kt.kind ='movie' AND mi.movie_id = t.id AND it2.id = mi.info_type_id AND kt.id = t.kind_id AND mc.movie_id = t.id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND miidx.movie_id = t.id AND it.id = miidx.info_type_id AND mi.movie_id = miidx.movie_id AND mi.movie_id = mc.movie_id AND miidx.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/14a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS northern_dark_movie FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind = 'movie' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2010 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/14b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS western_dark_production FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title') AND kt.kind = 'movie' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info > '6.0' AND t.production_year > 2010 and (t.title like '%murder%' or t.title like '%Murder%' or t.title like '%Mord%') AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/14c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS north_european_dark_production FROM info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it1.info = 'countries' AND it2.info = 'rating' AND k.keyword is not null and k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/15a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS internet_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code = '[us]' AND it1.info = 'release dates' AND mc.note like '%(200%)%' and mc.note like '%(worldwide)%' AND mi.note like '%internet%' AND mi.info like 'USA:% 200%' AND t.production_year > 2000 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/15b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS youtube_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code = '[us]' and cn.name = 'YouTube' AND it1.info = 'release dates' AND mc.note like '%(200%)%' and mc.note like '%(worldwide)%' AND mi.note like '%internet%' AND mi.info like 'USA:% 200%' AND t.production_year between 2005 and 2010 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/15c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS release_date, MIN(t.title) AS modern_american_internet_movie FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code = '[us]' AND it1.info = 'release dates' AND mi.note like '%internet%' AND mi.info is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year > 1990 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/15d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(at.title) AS aka_title, MIN(t.title) AS internet_movie_title FROM aka_title AS at, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cn.country_code = '[us]' AND it1.info = 'release dates' AND mi.note like '%internet%' AND t.production_year > 1990 AND t.id = at.movie_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = at.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = at.movie_id AND mc.movie_id = at.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/16a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr >= 50 AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/16b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/16c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/16d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS cool_actor_pseudonym, MIN(t.title) AS series_named_after_char FROM aka_name AS an, cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND t.episode_nr >= 5 AND t.episode_nr < 100 AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/17a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_american_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND n.name LIKE 'B%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/17b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name LIKE 'Z%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/17c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie, MIN(n.name) AS a1 FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name LIKE 'X%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/17d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name LIKE '%Bert%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/17e.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/17f.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS member_in_charnamed_movie FROM cast_info AS ci, company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword ='character-name-in-title' AND n.name LIKE '%B%' AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.movie_id = mc.movie_id AND ci.movie_id = mk.movie_id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/18a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note in ('(producer)', '(executive producer)') AND it1.info = 'budget' AND it2.info = 'votes' AND n.gender = 'm' and n.name like '%Tim%' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/18b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'rating' AND mi.info in ('Horror', 'Thriller') and mi.note is NULL AND mi_idx.info > '8.0' AND n.gender is not null and n.gender = 'f' AND t.production_year between 2008 and 2014 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/18c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(t.title) AS movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, movie_info AS mi, movie_info_idx AS mi_idx, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND mi.info in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND mi.movie_id = mi_idx.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/19a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND mc.note is not NULL and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND mi.info is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%Ang%' AND rt.role ='actress' AND t.production_year between 2005 and 2009 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/19b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS kung_fu_panda FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note = '(voice)' AND cn.country_code ='[us]' AND it.info = 'release dates' AND mc.note like '%(200%)%' and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND mi.info is not null and (mi.info like 'Japan:%2007%' or mi.info like 'USA:%2008%') AND n.gender ='f' and n.name like '%Angel%' AND rt.role ='actress' AND t.production_year between 2007 and 2008 and t.title like '%Kung%Fu%Panda%' AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/19c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS jap_engl_voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND mi.info is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year > 2000 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/19d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS voicing_actress, MIN(t.title) AS jap_engl_voiced_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, movie_companies AS mc, movie_info AS mi, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND n.gender ='f' AND rt.role ='actress' AND t.production_year > 2000 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mi.movie_id = ci.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/1a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'top 250 rank' AND mc.note not like '%(as Metro-Goldwyn-Mayer Pictures)%' and (mc.note like '%(co-production)%' or mc.note like '%(presents)%') AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/1b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'bottom 10 rank' AND mc.note not like '%(as Metro-Goldwyn-Mayer Pictures)%' AND t.production_year between 2005 and 2010 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/1c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'top 250 rank' AND mc.note not like '%(as Metro-Goldwyn-Mayer Pictures)%' and (mc.note like '%(co-production)%') AND t.production_year >2010 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/1d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mc.note) AS production_note, MIN(t.title) AS movie_title, MIN(t.production_year) AS movie_year FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info_idx AS mi_idx, title AS t WHERE ct.kind = 'production companies' AND it.info = 'bottom 10 rank' AND mc.note not like '%(as Metro-Goldwyn-Mayer Pictures)%' AND t.production_year >2000 AND ct.id = mc.company_type_id AND t.id = mc.movie_id AND t.id = mi_idx.movie_id AND mc.movie_id = mi_idx.movie_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/20a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS complete_downey_ironman_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name not like '%Sherlock%' and (chn.name like '%Tony%Stark%' or chn.name like '%Iron%Man%') AND k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND kt.kind = 'movie' AND t.production_year > 1950 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/20b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS complete_downey_ironman_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name not like '%Sherlock%' and (chn.name like '%Tony%Stark%' or chn.name like '%Iron%Man%') AND k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND kt.kind = 'movie' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/20c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS cast_member, MIN(t.title) AS complete_dynamic_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, keyword AS k, kind_type AS kt, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND k.keyword in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind = 'movie' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND ci.movie_id = cc.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/21a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS western_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/21b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS german_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Germany', 'German') AND t.production_year BETWEEN 2000 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/21c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS company_name, MIN(lt.link) AS link_type, MIN(t.title) AS western_follow_up FROM company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'English') AND t.production_year BETWEEN 1950 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/22a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Germany', 'German', 'USA', 'American') AND mi_idx.info < '7.0' AND t.production_year > 2008 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/22b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Germany', 'German', 'USA', 'American') AND mi_idx.info < '7.0' AND t.production_year > 2009 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/22c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/22d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS western_violent_movie FROM company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mc.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/23a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_us_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind = 'complete+verified' AND cn.country_code = '[us]' AND it1.info = 'release dates' AND kt.kind in ('movie') AND mi.note like '%internet%' AND mi.info is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/23b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_nerdy_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind = 'complete+verified' AND cn.country_code = '[us]' AND it1.info = 'release dates' AND k.keyword in ('nerd', 'loner', 'alienation', 'dignity') AND kt.kind in ('movie') AND mi.note like '%internet%' AND mi.info like 'USA:% 200%' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/23c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(kt.kind) AS movie_kind, MIN(t.title) AS complete_us_internet_movie FROM complete_cast AS cc, comp_cast_type AS cct1, company_name AS cn, company_type AS ct, info_type AS it1, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, title AS t WHERE cct1.kind = 'complete+verified' AND cn.country_code = '[us]' AND it1.info = 'release dates' AND kt.kind in ('movie', 'tv movie', 'video movie', 'video game') AND mi.note like '%internet%' AND mi.info is not NULL and (mi.info like 'USA:% 199%' or mi.info like 'USA:% 200%') AND t.production_year > 1990 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND cn.id = mc.company_id AND ct.id = mc.company_type_id AND cct1.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/24a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress_name, MIN(t.title) AS voiced_action_movie_jap_eng FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND k.keyword in ('hero', 'martial-arts', 'hand-to-hand-combat') AND mi.info is not null and (mi.info like 'Japan:%201%' or mi.info like 'USA:%201%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year > 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND ci.movie_id = mk.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/24b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress_name, MIN(t.title) AS kung_fu_panda FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND cn.name = 'DreamWorks Animation' AND it.info = 'release dates' AND k.keyword in ('hero', 'martial-arts', 'hand-to-hand-combat', 'computer-animated-movie') AND mi.info is not null and (mi.info like 'Japan:%201%' or mi.info like 'USA:%201%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year > 2010 AND t.title like 'Kung Fu Panda%' AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND ci.movie_id = mk.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/25a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'blood', 'gore', 'death', 'female-nudity') AND mi.info = 'Horror' AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/25b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'blood', 'gore', 'death', 'female-nudity') AND mi.info = 'Horror' AND n.gender = 'm' AND t.production_year > 2010 AND t.title like 'Vampire%' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/25c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS male_writer, MIN(t.title) AS violent_movie_title FROM cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi_idx.movie_id = mk.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/26a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(n.name) AS playing_actor, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info = 'rating' AND k.keyword in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind = 'movie' AND mi_idx.info > '7.0' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/26b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info = 'rating' AND k.keyword in ('superhero', 'marvel-comics', 'based-on-comic', 'fight') AND kt.kind = 'movie' AND mi_idx.info > '8.0' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/26c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS character_name, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_hero_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, info_type AS it2, keyword AS k, kind_type AS kt, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like '%complete%' AND chn.name is not NULL and (chn.name like '%man%' or chn.name like '%Man%') AND it2.info = 'rating' AND k.keyword in ('superhero', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence', 'magnet', 'web', 'claw', 'laser') AND kt.kind = 'movie' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND t.id = cc.movie_id AND t.id = mi_idx.movie_id AND mk.movie_id = ci.movie_id AND mk.movie_id = cc.movie_id AND mk.movie_id = mi_idx.movie_id AND ci.movie_id = cc.movie_id AND ci.movie_id = mi_idx.movie_id AND cc.movie_id = mi_idx.movie_id AND chn.id = ci.person_role_id AND n.id = ci.person_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND it2.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/27a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind in ('cast', 'crew') AND cct2.kind = 'complete' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Germany','Swedish', 'German') AND t.production_year BETWEEN 1950 AND 2000 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/27b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind in ('cast', 'crew') AND cct2.kind = 'complete' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Germany','Swedish', 'German') AND t.production_year = 1998 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/27c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS producing_company, MIN(lt.link) AS link_type, MIN(t.title) AS complete_western_sequel FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, keyword AS k, link_type AS lt, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, movie_link AS ml, title AS t WHERE cct1.kind = 'cast' AND cct2.kind like 'complete%' AND cn.country_code !='[pl]' AND (cn.name LIKE '%Film%' OR cn.name LIKE '%Warner%') AND ct.kind ='production companies' AND k.keyword ='sequel' AND lt.link LIKE '%follow%' AND mc.note IS NULL AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'English') AND t.production_year BETWEEN 1950 AND 2010 AND lt.id = ml.link_type_id AND ml.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND t.id = mc.movie_id AND mc.company_type_id = ct.id AND mc.company_id = cn.id AND mi.movie_id = t.id AND t.id = cc.movie_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id AND ml.movie_id = mk.movie_id AND ml.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND ml.movie_id = mi.movie_id AND mk.movie_id = mi.movie_id AND mc.movie_id = mi.movie_id AND ml.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = cc.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/28a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind = 'crew' AND cct2.kind != 'complete+verified' AND cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2000 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/28b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind = 'crew' AND cct2.kind != 'complete+verified' AND cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Germany', 'Swedish', 'German') AND mi_idx.info > '6.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/28c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn.name) AS movie_company, MIN(mi_idx.info) AS rating, MIN(t.title) AS complete_euro_dark_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, company_name AS cn, company_type AS ct, info_type AS it1, info_type AS it2, keyword AS k, kind_type AS kt, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE cct1.kind = 'cast' AND cct2.kind = 'complete' AND cn.country_code != '[us]' AND it1.info = 'countries' AND it2.info = 'rating' AND k.keyword in ('murder', 'murder-in-title', 'blood', 'violence') AND kt.kind in ('movie', 'episode') AND mc.note not like '%(USA)%' and mc.note like '%(200%)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Danish', 'Norwegian', 'German', 'USA', 'American') AND mi_idx.info < '8.5' AND t.production_year > 2005 AND kt.id = t.kind_id AND t.id = mi.movie_id AND t.id = mk.movie_id AND t.id = mi_idx.movie_id AND t.id = mc.movie_id AND t.id = cc.movie_id AND mk.movie_id = mi.movie_id AND mk.movie_id = mi_idx.movie_id AND mk.movie_id = mc.movie_id AND mk.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mc.movie_id AND mi.movie_id = cc.movie_id AND mc.movie_id = mi_idx.movie_id AND mc.movie_id = cc.movie_id AND mi_idx.movie_id = cc.movie_id AND k.id = mk.keyword_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND ct.id = mc.company_type_id AND cn.id = mc.company_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/29a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind ='cast' AND cct2.kind ='complete+verified' AND chn.name = 'Queen' AND ci.note in ('(voice)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND it3.info = 'trivia' AND k.keyword = 'computer-animation' AND mi.info is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.title = 'Shrek 2' AND t.production_year between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/29b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind ='cast' AND cct2.kind ='complete+verified' AND chn.name = 'Queen' AND ci.note in ('(voice)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND it3.info = 'height' AND k.keyword = 'computer-animation' AND mi.info like 'USA:%200%' AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.title = 'Shrek 2' AND t.production_year between 2000 and 2005 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/29c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(chn.name) AS voiced_char, MIN(n.name) AS voicing_actress, MIN(t.title) AS voiced_animation FROM aka_name AS an, complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, char_name AS chn, cast_info AS ci, company_name AS cn, info_type AS it, info_type AS it3, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_keyword AS mk, name AS n, person_info AS pi, role_type AS rt, title AS t WHERE cct1.kind ='cast' AND cct2.kind ='complete+verified' AND ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND it.info = 'release dates' AND it3.info = 'trivia' AND k.keyword = 'computer-animation' AND mi.info is not null and (mi.info like 'Japan:%200%' or mi.info like 'USA:%200%') AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND t.production_year between 2000 and 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND mc.movie_id = ci.movie_id AND mc.movie_id = mi.movie_id AND mc.movie_id = mk.movie_id AND mc.movie_id = cc.movie_id AND mi.movie_id = ci.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND cn.id = mc.company_id AND it.id = mi.info_type_id AND n.id = ci.person_id AND rt.id = ci.role_id AND n.id = an.person_id AND ci.person_id = an.person_id AND chn.id = ci.person_role_id AND n.id = pi.person_id AND ci.person_id = pi.person_id AND it3.id = pi.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/2a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[de]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/2b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[nl]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/2c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[sm]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/2d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM company_name AS cn, keyword AS k, movie_companies AS mc, movie_keyword AS mk, title AS t WHERE cn.country_code ='[us]' AND k.keyword ='character-name-in-title' AND cn.id = mc.company_id AND mc.movie_id = t.id AND t.id = mk.movie_id AND mk.keyword_id = k.id AND mc.movie_id = mk.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/30a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_violent_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind in ('cast', 'crew') AND cct2.kind ='complete+verified' AND ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Thriller') AND n.gender = 'm' AND t.production_year > 2000 AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/30b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_gore_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind in ('cast', 'crew') AND cct2.kind ='complete+verified' AND ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Thriller') AND n.gender = 'm' AND t.production_year > 2000 and (t.title like '%Freddy%' or t.title like '%Jason%' or t.title like 'Saw%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/30c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS complete_violent_movie FROM complete_cast AS cc, comp_cast_type AS cct1, comp_cast_type AS cct2, cast_info AS ci, info_type AS it1, info_type AS it2, keyword AS k, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE cct1.kind = 'cast' AND cct2.kind ='complete+verified' AND ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = cc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = cc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = cc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = cc.movie_id AND mk.movie_id = cc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cct1.id = cc.subject_id AND cct2.id = cc.status_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/31a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name like 'Lionsgate%' AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Thriller') AND n.gender = 'm' AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/31b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name like 'Lionsgate%' AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mc.note like '%(Blu-ray)%' AND mi.info in ('Horror', 'Thriller') AND n.gender = 'm' AND t.production_year > 2000 and (t.title like '%Freddy%' or t.title like '%Jason%' or t.title like 'Saw%') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/31c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi.info) AS movie_budget, MIN(mi_idx.info) AS movie_votes, MIN(n.name) AS writer, MIN(t.title) AS violent_liongate_movie FROM cast_info AS ci, company_name AS cn, info_type AS it1, info_type AS it2, keyword AS k, movie_companies AS mc, movie_info AS mi, movie_info_idx AS mi_idx, movie_keyword AS mk, name AS n, title AS t WHERE ci.note in ('(writer)', '(head writer)', '(written by)', '(story)', '(story editor)') AND cn.name like 'Lionsgate%' AND it1.info = 'genres' AND it2.info = 'votes' AND k.keyword in ('murder', 'violence', 'blood', 'gore', 'death', 'female-nudity', 'hospital') AND mi.info in ('Horror', 'Action', 'Sci-Fi', 'Thriller', 'Crime', 'War') AND t.id = mi.movie_id AND t.id = mi_idx.movie_id AND t.id = ci.movie_id AND t.id = mk.movie_id AND t.id = mc.movie_id AND ci.movie_id = mi.movie_id AND ci.movie_id = mi_idx.movie_id AND ci.movie_id = mk.movie_id AND ci.movie_id = mc.movie_id AND mi.movie_id = mi_idx.movie_id AND mi.movie_id = mk.movie_id AND mi.movie_id = mc.movie_id AND mi_idx.movie_id = mk.movie_id AND mi_idx.movie_id = mc.movie_id AND mk.movie_id = mc.movie_id AND n.id = ci.person_id AND it1.id = mi.info_type_id AND it2.id = mi_idx.info_type_id AND k.id = mk.keyword_id AND cn.id = mc.company_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/32a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(lt.link) AS link_type, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM keyword AS k, link_type AS lt, movie_keyword AS mk, movie_link AS ml, title AS t1, title AS t2 WHERE k.keyword ='10,000-mile-club' AND mk.keyword_id = k.id AND t1.id = mk.movie_id AND ml.movie_id = t1.id AND ml.linked_movie_id = t2.id AND lt.id = ml.link_type_id AND mk.movie_id = t1.id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/32b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(lt.link) AS link_type, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM keyword AS k, link_type AS lt, movie_keyword AS mk, movie_link AS ml, title AS t1, title AS t2 WHERE k.keyword ='character-name-in-title' AND mk.keyword_id = k.id AND t1.id = mk.movie_id AND ml.movie_id = t1.id AND ml.linked_movie_id = t2.id AND lt.id = ml.link_type_id AND mk.movie_id = t1.id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/33a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code = '[us]' AND it1.info = 'rating' AND it2.info = 'rating' AND kt1.kind in ('tv series') AND kt2.kind in ('tv series') AND lt.link in ('sequel', 'follows', 'followed by') AND mi_idx2.info < '3.0' AND t2.production_year between 2005 and 2008 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/33b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code = '[nl]' AND it1.info = 'rating' AND it2.info = 'rating' AND kt1.kind in ('tv series') AND kt2.kind in ('tv series') AND lt.link LIKE '%follow%' AND mi_idx2.info < '3.0' AND t2.production_year = 2007 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/33c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(cn1.name) AS first_company, MIN(cn2.name) AS second_company, MIN(mi_idx1.info) AS first_rating, MIN(mi_idx2.info) AS second_rating, MIN(t1.title) AS first_movie, MIN(t2.title) AS second_movie FROM company_name AS cn1, company_name AS cn2, info_type AS it1, info_type AS it2, kind_type AS kt1, kind_type AS kt2, link_type AS lt, movie_companies AS mc1, movie_companies AS mc2, movie_info_idx AS mi_idx1, movie_info_idx AS mi_idx2, movie_link AS ml, title AS t1, title AS t2 WHERE cn1.country_code != '[us]' AND it1.info = 'rating' AND it2.info = 'rating' AND kt1.kind in ('tv series', 'episode') AND kt2.kind in ('tv series', 'episode') AND lt.link in ('sequel', 'follows', 'followed by') AND mi_idx2.info < '3.5' AND t2.production_year between 2000 and 2010 AND lt.id = ml.link_type_id AND t1.id = ml.movie_id AND t2.id = ml.linked_movie_id AND it1.id = mi_idx1.info_type_id AND t1.id = mi_idx1.movie_id AND kt1.id = t1.kind_id AND cn1.id = mc1.company_id AND t1.id = mc1.movie_id AND ml.movie_id = mi_idx1.movie_id AND ml.movie_id = mc1.movie_id AND mi_idx1.movie_id = mc1.movie_id AND it2.id = mi_idx2.info_type_id AND t2.id = mi_idx2.movie_id AND kt2.id = t2.kind_id AND cn2.id = mc2.company_id AND t2.id = mc2.movie_id AND ml.linked_movie_id = mi_idx2.movie_id AND ml.linked_movie_id = mc2.movie_id AND mi_idx2.movie_id = mc2.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/3a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword like '%sequel%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year > 2005 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/3b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword like '%sequel%' AND mi.info IN ('Bulgaria') AND t.production_year > 2010 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/3c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS movie_title FROM keyword AS k, movie_info AS mi, movie_keyword AS mk, title AS t WHERE k.keyword like '%sequel%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND t.production_year > 1990 AND t.id = mi.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi.movie_id AND k.id = mk.keyword_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/4a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword like '%sequel%' AND mi_idx.info > '5.0' AND t.production_year > 2005 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/4b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword like '%sequel%' AND mi_idx.info > '9.0' AND t.production_year > 2010 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/4c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(mi_idx.info) AS rating, MIN(t.title) AS movie_title FROM info_type AS it, keyword AS k, movie_info_idx AS mi_idx, movie_keyword AS mk, title AS t WHERE it.info ='rating' AND k.keyword like '%sequel%' AND mi_idx.info > '2.0' AND t.production_year > 1990 AND t.id = mi_idx.movie_id AND t.id = mk.movie_id AND mk.movie_id = mi_idx.movie_id AND k.id = mk.keyword_id AND it.id = mi_idx.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/5a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS typical_european_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind = 'production companies' AND mc.note like '%(theatrical)%' and mc.note like '%(France)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German') AND t.production_year > 2005 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/5b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS american_vhs_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind = 'production companies' AND mc.note like '%(VHS)%' and mc.note like '%(USA)%' and mc.note like '%(1994)%' AND mi.info IN ('USA', 'America') AND t.production_year > 2010 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/5c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(t.title) AS american_movie FROM company_type AS ct, info_type AS it, movie_companies AS mc, movie_info AS mi, title AS t WHERE ct.kind = 'production companies' AND mc.note not like '%(TV)%' and mc.note like '%(USA)%' AND mi.info IN ('Sweden', 'Norway', 'Germany', 'Denmark', 'Swedish', 'Denish', 'Norwegian', 'German', 'USA', 'American') AND t.production_year > 1990 AND t.id = mi.movie_id AND t.id = mc.movie_id AND mc.movie_id = mi.movie_id AND ct.id = mc.company_type_id AND it.id = mi.info_type_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/6a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2010 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/6b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2014 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/6c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2014 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/6d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/6e.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS marvel_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword = 'marvel-cinematic-universe' AND n.name LIKE '%Downey%Robert%' AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/6f.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(k.keyword) AS movie_keyword, MIN(n.name) AS actor_name, MIN(t.title) AS hero_movie FROM cast_info AS ci, keyword AS k, movie_keyword AS mk, name AS n, title AS t WHERE k.keyword in ('superhero', 'sequel', 'second-part', 'marvel-comics', 'based-on-comic', 'tv-special', 'fight', 'violence') AND t.production_year > 2000 AND k.id = mk.keyword_id AND t.id = mk.movie_id AND t.id = ci.movie_id AND ci.movie_id = mk.movie_id AND n.id = ci.person_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/7a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS of_person, MIN(t.title) AS biography_movie FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name LIKE '%a%' AND it.info ='mini biography' AND lt.link ='features' AND n.name_pcode_cf BETWEEN 'A' AND 'F' AND (n.gender='m' OR (n.gender = 'f' AND n.name LIKE 'B%')) AND pi.note ='Volker Boehm' AND t.production_year BETWEEN 1980 AND 1995 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/7b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS of_person, MIN(t.title) AS biography_movie FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name LIKE '%a%' AND it.info ='mini biography' AND lt.link ='features' AND n.name_pcode_cf LIKE 'D%' AND n.gender='m' AND pi.note ='Volker Boehm' AND t.production_year BETWEEN 1980 AND 1984 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/7c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(n.name) AS cast_member_name, MIN(pi.info) AS cast_member_info FROM aka_name AS an, cast_info AS ci, info_type AS it, link_type AS lt, movie_link AS ml, name AS n, person_info AS pi, title AS t WHERE an.name is not NULL and (an.name LIKE '%a%' or an.name LIKE 'A%') AND it.info ='mini biography' AND lt.link in ('references', 'referenced in', 'features', 'featured in') AND n.name_pcode_cf BETWEEN 'A' AND 'F' AND (n.gender='m' OR (n.gender = 'f' AND n.name LIKE 'A%')) AND pi.note is not NULL AND t.production_year BETWEEN 1980 AND 2010 AND n.id = an.person_id AND n.id = pi.person_id AND ci.person_id = n.id AND t.id = ci.movie_id AND ml.linked_movie_id = t.id AND lt.id = ml.link_type_id AND it.id = pi.info_type_id AND pi.person_id = an.person_id AND pi.person_id = ci.person_id AND an.person_id = ci.person_id AND ci.movie_id = ml.linked_movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/8a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an1.name) AS actress_pseudonym, MIN(t.title) AS japanese_movie_dubbed FROM aka_name AS an1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE ci.note ='(voice: English version)' AND cn.country_code ='[jp]' AND mc.note like '%(Japan)%' and mc.note not like '%(USA)%' AND n1.name like '%Yo%' and n1.name not like '%Yu%' AND rt.role ='actress' AND an1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an1.person_id = ci.person_id AND ci.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/8b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS acress_pseudonym, MIN(t.title) AS japanese_anime_movie FROM aka_name AS an, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note ='(voice: English version)' AND cn.country_code ='[jp]' AND mc.note like '%(Japan)%' and mc.note not like '%(USA)%' and (mc.note like '%(2006)%' or mc.note like '%(2007)%') AND n.name like '%Yo%' and n.name not like '%Yu%' AND rt.role ='actress' AND t.production_year between 2006 and 2007 and (t.title like 'One Piece%' or t.title like 'Dragon Ball Z%') AND an.person_id = n.id AND n.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an.person_id = ci.person_id AND ci.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/8c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(a1.name) AS writer_pseudo_name, MIN(t.title) AS movie_title FROM aka_name AS a1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE cn.country_code ='[us]' AND rt.role ='writer' AND a1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND a1.person_id = ci.person_id AND ci.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/8d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an1.name) AS costume_designer_pseudo, MIN(t.title) AS movie_with_costumes FROM aka_name AS an1, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n1, role_type AS rt, title AS t WHERE cn.country_code ='[us]' AND rt.role ='costume designer' AND an1.person_id = n1.id AND n1.id = ci.person_id AND ci.movie_id = t.id AND t.id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND an1.person_id = ci.person_id AND ci.movie_id = mc.movie_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/9a.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS character_name, MIN(t.title) AS movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND mc.note is not NULL and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND n.gender ='f' and n.name like '%Ang%' AND rt.role ='actress' AND t.production_year between 2005 and 2015 AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/9b.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_character, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note = '(voice)' AND cn.country_code ='[us]' AND mc.note like '%(200%)%' and (mc.note like '%(USA)%' or mc.note like '%(worldwide)%') AND n.gender ='f' and n.name like '%Angel%' AND rt.role ='actress' AND t.production_year between 2007 and 2010 AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/9c.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_character_name, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND n.gender ='f' and n.name like '%An%' AND rt.role ='actress' AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/9d.sql: -------------------------------------------------------------------------------- 1 | SELECT MIN(an.name) AS alternative_name, MIN(chn.name) AS voiced_char_name, MIN(n.name) AS voicing_actress, MIN(t.title) AS american_movie FROM aka_name AS an, char_name AS chn, cast_info AS ci, company_name AS cn, movie_companies AS mc, name AS n, role_type AS rt, title AS t WHERE ci.note in ('(voice)', '(voice: Japanese version)', '(voice) (uncredited)', '(voice: English version)') AND cn.country_code ='[us]' AND n.gender ='f' AND rt.role ='actress' AND ci.movie_id = t.id AND t.id = mc.movie_id AND ci.movie_id = mc.movie_id AND mc.company_id = cn.id AND ci.role_id = rt.id AND n.id = ci.person_id AND chn.id = ci.person_role_id AND an.person_id = n.id AND an.person_id = ci.person_id; 2 | -------------------------------------------------------------------------------- /IMDB-JOB/temp.sql: -------------------------------------------------------------------------------- 1 | SELECT COUNT(*) FROM A, B, C, D, E, F WHERE 2 | A.id = B.Aid AND A.id = E.Aid AND 3 | A.id2 = B.Aid2 AND A.id2 = C.Aid2 AND 4 | C.id = D.Cid AND 5 | E.id = D.Eid AND E.id = F.Eid AND D.Eid = F.Eid 6 | AND Q(A) AND Q(B) AND Q(C) AND Q(D) AND Q(E) 7 | AND Q(F); 8 | 9 | -------------------------------------------------------------------------------- /Join_scheme/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Join_scheme/.DS_Store -------------------------------------------------------------------------------- /Join_scheme/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Join_scheme/__init__.py -------------------------------------------------------------------------------- /Join_scheme/factor.py: -------------------------------------------------------------------------------- 1 | class Factor: 2 | """ 3 | This the class defines a multidimensional conditional probability. 4 | """ 5 | def __init__(self, table=None, table_len=None, variables=None, pdfs=None, equivalent_variables=None, na_values=None): 6 | self.table = table 7 | self.table_len = table_len 8 | self.variables = variables 9 | self.equivalent_variables = equivalent_variables 10 | self.pdfs = pdfs 11 | self.cardinalities = dict() 12 | for i, var in enumerate(self.variables): 13 | if type(pdfs) == dict: 14 | self.cardinalities[var] = len(pdfs[var]) 15 | if equivalent_variables and len(equivalent_variables) != 0: 16 | self.cardinalities[equivalent_variables[i]] = pdfs[var] 17 | else: 18 | self.cardinalities[var] = pdfs.shape[i] 19 | if equivalent_variables and len(equivalent_variables) != 0: 20 | self.cardinalities[equivalent_variables[i]] = pdfs.shape[i] 21 | self.na_values = na_values # the percentage of data, which is not nan, so the variable name is misleading. 22 | 23 | 24 | class Group_Factor: 25 | """ 26 | This the class defines a multidimensional conditional probability on a group of tables. 27 | """ 28 | def __init__(self, tables, tables_size, variables, pdfs, bin_modes, equivalent_groups=None, 29 | table_key_equivalent_group=None, na_values=None, join_cond=None): 30 | self.table = tables 31 | self.tables_size = tables_size 32 | self.variables = variables 33 | self.pdfs = pdfs 34 | self.bin_modes = bin_modes 35 | self.equivalent_groups = equivalent_groups 36 | self.table_key_equivalent_group = table_key_equivalent_group 37 | self.na_values = na_values 38 | self.join_cond = join_cond 39 | -------------------------------------------------------------------------------- /Join_scheme/tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from Schemas.imdb.schema import gen_imdb_schema 3 | from Join_scheme.binning import identify_key_values 4 | from Join_scheme.join_graph import parse_query_all_join 5 | 6 | 7 | def count_join_key_appearance(queries, equivalent_keys): 8 | """ 9 | analyze the workload and count how many times each join key group appears 10 | """ 11 | all_join_keys_stats = dict() 12 | total_num_appearance = 0 13 | for q in queries: 14 | res = parse_query_all_join(q) 15 | for table in res[-1]: 16 | for join_key in list(res[-1][table]): 17 | for PK in equivalent_keys: 18 | if join_key in equivalent_keys[PK]: 19 | total_num_appearance += 1 20 | if PK in all_join_keys_stats: 21 | all_join_keys_stats[PK] += 1 22 | else: 23 | all_join_keys_stats[PK] = 1 24 | break 25 | return all_join_keys_stats, total_num_appearance 26 | 27 | 28 | def get_n_bins_from_query(bin_size, data_path, query_file): 29 | """ 30 | Derive the optimal number of bins to use for each join key group 31 | :param bin_size: average number of bins to use for each join key group 32 | :param data_path: 33 | :param query_file: 34 | :return: 35 | """ 36 | schema = gen_imdb_schema(data_path) 37 | all_keys, equivalent_keys = identify_key_values(schema) 38 | n_bins = dict() 39 | if query_file is None: 40 | for key in equivalent_keys: 41 | n_bins[key] = bin_size 42 | else: 43 | with open(query_file, "r") as f: 44 | queries = f.readlines() 45 | all_join_keys_stats, total_num_appearance = count_join_key_appearance(queries, equivalent_keys) 46 | 47 | total_bins = bin_size * len(equivalent_keys) 48 | for key in equivalent_keys: 49 | n_bins[key] = total_bins * (all_join_keys_stats[key] / total_num_appearance) 50 | 51 | return n_bins 52 | -------------------------------------------------------------------------------- /Pgmpy/__init__.py: -------------------------------------------------------------------------------- 1 | from .global_vars import HAS_PANDAS, device 2 | 3 | __all__ = ["HAS_PANDAS", "device"] 4 | __version__ = "0.1.10dev" 5 | -------------------------------------------------------------------------------- /Pgmpy/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/__pycache__/global_vars.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/__pycache__/global_vars.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/base/__init__.py: -------------------------------------------------------------------------------- 1 | from .UndirectedGraph import UndirectedGraph 2 | from .DAG import DAG 3 | 4 | __all__ = ["UndirectedGraph", "DAG"] 5 | -------------------------------------------------------------------------------- /Pgmpy/base/__pycache__/DAG.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/base/__pycache__/DAG.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/base/__pycache__/UndirectedGraph.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/base/__pycache__/UndirectedGraph.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/base/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/base/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .Data import Data 2 | 3 | __all__ = ["Data"] 4 | -------------------------------------------------------------------------------- /Pgmpy/data/__pycache__/Data.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/data/__pycache__/Data.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/data/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/data/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/estimators/MLE.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | from itertools import chain 4 | 5 | import numpy as np 6 | 7 | from Pgmpy.estimators import ParameterEstimator 8 | from Pgmpy.factors.discrete import TabularCPD 9 | from Pgmpy.models import BayesianModel 10 | 11 | 12 | class MaximumLikelihoodEstimator(ParameterEstimator): 13 | def __init__(self, model, data, **kwargs): 14 | """ 15 | Class used to compute parameters for a model using Maximum Likelihood Estimation. 16 | 17 | Parameters 18 | ---------- 19 | model: A pgmpy.models.BayesianModel instance 20 | 21 | data: pandas DataFrame object 22 | DataFrame object with column names identical to the variable names of the network. 23 | (If some values in the data are missing the data cells should be set to `numpy.NaN`. 24 | Note that pandas converts each column containing `numpy.NaN`s to dtype `float`.) 25 | 26 | state_names: dict (optional) 27 | A dict indicating, for each variable, the discrete set of states 28 | that the variable can take. If unspecified, the observed values 29 | in the data set are taken to be the only possible states. 30 | 31 | complete_samples_only: bool (optional, default `True`) 32 | Specifies how to deal with missing data, if present. If set to `True` all rows 33 | that contain `np.NaN` somewhere are ignored. If `False` then, for each variable, 34 | every row where neither the variable nor its parents are `np.NaN` is used. 35 | """ 36 | 37 | if not isinstance(model, BayesianModel): 38 | raise NotImplementedError( 39 | "Maximum Likelihood Estimate is only implemented for BayesianModel" 40 | ) 41 | 42 | super(MaximumLikelihoodEstimator, self).__init__(model, data, **kwargs) 43 | 44 | def get_parameters(self): 45 | """ 46 | Method to estimate the model parameters (CPDs) using Maximum Likelihood Estimation. 47 | 48 | Returns 49 | ------- 50 | parameters: list 51 | List of TabularCPDs, one for each variable of the model 52 | """ 53 | parameters = [] 54 | 55 | for node in sorted(self.model.nodes()): 56 | cpd = self.estimate_cpd(node) 57 | parameters.append(cpd) 58 | 59 | return parameters 60 | 61 | def estimate_cpd(self, node): 62 | """ 63 | Method to estimate the CPD for a given variable. 64 | 65 | Parameters 66 | ---------- 67 | node: int, string (any hashable python object) 68 | The name of the variable for which the CPD is to be estimated. 69 | 70 | Returns 71 | ------- 72 | CPD: TabularCPD 73 | """ 74 | 75 | state_counts = self.state_counts(node) 76 | 77 | # if a column contains only `0`s (no states observed for some configuration 78 | # of parents' states) fill that column uniformly instead 79 | state_counts.loc[:, (state_counts == 0).all()] = 1 80 | 81 | parents = sorted(self.model.get_parents(node)) 82 | parents_cardinalities = [len(self.state_names[parent]) for parent in parents] 83 | node_cardinality = len(self.state_names[node]) 84 | 85 | # Get the state names for the CPD 86 | state_names = {node: list(state_counts.index)} 87 | if parents: 88 | state_names.update( 89 | { 90 | state_counts.columns.names[i]: list(state_counts.columns.levels[i]) 91 | for i in range(len(parents)) 92 | } 93 | ) 94 | 95 | cpd = TabularCPD( 96 | node, 97 | node_cardinality, 98 | np.array(state_counts), 99 | evidence=parents, 100 | evidence_card=parents_cardinalities, 101 | state_names={var: self.state_names[var] for var in chain([node], parents)}, 102 | ) 103 | cpd.normalize() 104 | return cpd 105 | 106 | -------------------------------------------------------------------------------- /Pgmpy/estimators/__init__.py: -------------------------------------------------------------------------------- 1 | from Pgmpy.estimators.base import BaseEstimator, ParameterEstimator, StructureEstimator 2 | from Pgmpy.estimators.MLE import MaximumLikelihoodEstimator 3 | from Pgmpy.estimators.BayesianEstimator import BayesianEstimator 4 | 5 | __all__ = [ 6 | "BaseEstimator", 7 | "ParameterEstimator", 8 | "MaximumLikelihoodEstimator", 9 | "BayesianEstimator", 10 | "StructureEstimator", 11 | ] 12 | -------------------------------------------------------------------------------- /Pgmpy/estimators/__pycache__/BayesianEstimator.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/estimators/__pycache__/BayesianEstimator.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/estimators/__pycache__/CITests.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/estimators/__pycache__/CITests.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/estimators/__pycache__/MLE.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/estimators/__pycache__/MLE.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/estimators/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/estimators/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/estimators/__pycache__/base.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/estimators/__pycache__/base.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/extern/__init__.py: -------------------------------------------------------------------------------- 1 | from .tabulate import tabulate 2 | 3 | __all__ = ["tabulate"] 4 | -------------------------------------------------------------------------------- /Pgmpy/extern/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/extern/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/extern/__pycache__/tabulate.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/extern/__pycache__/tabulate.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/factors/__init__.py: -------------------------------------------------------------------------------- 1 | from .FactorSet import FactorSet, factorset_product, factorset_divide 2 | from .base import factor_product, factor_divide 3 | 4 | __all__ = [ 5 | "FactorSet", 6 | "factorset_divide", 7 | "factorset_product", 8 | "factor_product", 9 | "factor_divide", 10 | ] 11 | -------------------------------------------------------------------------------- /Pgmpy/factors/__pycache__/FactorSet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/factors/__pycache__/FactorSet.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/factors/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/factors/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/factors/__pycache__/base.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/factors/__pycache__/base.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/factors/base.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from functools import reduce 3 | 4 | class BaseFactor(object): 5 | """ 6 | Base class for Factors. Any Factor implementation should inherit this class. 7 | """ 8 | 9 | def __init__(self, *args, **kwargs): 10 | pass 11 | 12 | @abstractmethod 13 | def is_valid_cpd(self): 14 | pass 15 | 16 | 17 | def factor_product(*args): 18 | """ 19 | Returns factor product over `args`. 20 | 21 | Parameters 22 | ---------- 23 | args: `BaseFactor` instances. 24 | factors to be multiplied 25 | 26 | Returns 27 | ------- 28 | BaseFactor: `BaseFactor` representing factor product over all the `BaseFactor` instances in args. 29 | """ 30 | if not all(isinstance(phi, BaseFactor) for phi in args): 31 | raise TypeError("Arguments must be factors") 32 | # Check if all of the arguments are of the same type 33 | elif len(set(map(type, args))) != 1: 34 | raise NotImplementedError( 35 | "All the args are expected to be instances of the same factor class." 36 | ) 37 | return reduce(lambda phi1, phi2: phi1 * phi2, args) 38 | 39 | 40 | def factor_divide(phi1, phi2): 41 | """ 42 | Returns `DiscreteFactor` representing `phi1 / phi2`. 43 | 44 | Parameters 45 | ---------- 46 | phi1: Factor 47 | The Dividend. 48 | 49 | phi2: Factor 50 | The Divisor. 51 | 52 | Returns 53 | ------- 54 | DiscreteFactor: `DiscreteFactor` representing factor division `phi1 / phi2`. 55 | 56 | """ 57 | if not isinstance(phi1, BaseFactor) or not isinstance(phi2, BaseFactor): 58 | raise TypeError("phi1 and phi2 should be factors instances") 59 | 60 | # Check if all of the arguments are of the same type 61 | elif type(phi1) != type(phi2): 62 | raise NotImplementedError( 63 | "All the args are expected to be instances of the same factor class." 64 | ) 65 | 66 | return phi1.divide(phi2, inplace=False) 67 | -------------------------------------------------------------------------------- /Pgmpy/factors/continuous/__init__.py: -------------------------------------------------------------------------------- 1 | from Pgmpy.factors.distributions.CanonicalDistribution import CanonicalDistribution 2 | from .ContinuousFactor import ContinuousFactor 3 | from .LinearGaussianCPD import LinearGaussianCPD 4 | from .discretize import BaseDiscretizer, RoundingDiscretizer, UnbiasedDiscretizer 5 | 6 | __all__ = [ 7 | "CanonicalDistribution", 8 | "ContinuousFactor", 9 | "LinearGaussianCPD", "BaseDiscretizer", 10 | "RoundingDiscretizer", 11 | "UnbiasedDiscretizer", 12 | ] 13 | -------------------------------------------------------------------------------- /Pgmpy/factors/continuous/__pycache__/ContinuousFactor.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/factors/continuous/__pycache__/ContinuousFactor.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/factors/continuous/__pycache__/LinearGaussianCPD.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/factors/continuous/__pycache__/LinearGaussianCPD.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/factors/continuous/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/factors/continuous/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/factors/continuous/__pycache__/discretize.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/factors/continuous/__pycache__/discretize.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/factors/discrete/__init__.py: -------------------------------------------------------------------------------- 1 | from .DiscreteFactor import DiscreteFactor, State 2 | from .CPD import TabularCPD 3 | from .JointProbabilityDistribution import JointProbabilityDistribution 4 | 5 | __all__ = ["TabularCPD", "State", "DiscreteFactor"] 6 | -------------------------------------------------------------------------------- /Pgmpy/factors/discrete/__pycache__/CPD.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/factors/discrete/__pycache__/CPD.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/factors/discrete/__pycache__/DiscreteFactor.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/factors/discrete/__pycache__/DiscreteFactor.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/factors/discrete/__pycache__/JointProbabilityDistribution.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/factors/discrete/__pycache__/JointProbabilityDistribution.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/factors/discrete/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/factors/discrete/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/factors/distributions/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDistribution 2 | from .CustomDistribution import CustomDistribution 3 | from .GaussianDistribution import GaussianDistribution 4 | 5 | 6 | __all__ = ["BaseDistribution", "CustomDistribution", "GaussianDistribution"] 7 | -------------------------------------------------------------------------------- /Pgmpy/factors/distributions/__pycache__/CanonicalDistribution.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/factors/distributions/__pycache__/CanonicalDistribution.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/factors/distributions/__pycache__/CustomDistribution.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/factors/distributions/__pycache__/CustomDistribution.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/factors/distributions/__pycache__/GaussianDistribution.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/factors/distributions/__pycache__/GaussianDistribution.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/factors/distributions/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/factors/distributions/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/factors/distributions/__pycache__/base.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/factors/distributions/__pycache__/base.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/factors/distributions/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod, abstractproperty 2 | 3 | 4 | class BaseDistribution(object): 5 | """ 6 | @abstractproperty 7 | def pdf(self): 8 | pass 9 | 10 | @abstractproperty 11 | def variables(self): 12 | pass 13 | 14 | @abstractmethod 15 | def assignment(self, *args, **kwargs): 16 | pass 17 | 18 | @abstractmethod 19 | def copy(self): 20 | pass 21 | @abstractmethod 22 | def discretize(self, method, *args, **kwargs): 23 | pass 24 | 25 | @abstractmethod 26 | def reduce(self, values, inplace=True): 27 | pass 28 | 29 | @abstractmethod 30 | def marginalize(self, variables, inplace=True): 31 | pass 32 | 33 | @abstractmethod 34 | def normalize(self, inplace=True): 35 | pass 36 | 37 | @abstractmethod 38 | def product(self, other, inplace=True): 39 | pass 40 | 41 | @abstractmethod 42 | def divide(self, other, inplace=True): 43 | pass 44 | """ 45 | 46 | pass 47 | -------------------------------------------------------------------------------- /Pgmpy/global_vars.py: -------------------------------------------------------------------------------- 1 | # TODO: This variables being set in this file should move to setup.py 2 | 3 | 4 | try: # pragma: no cover 5 | import torch 6 | 7 | # Check if GPU is available 8 | if torch.cuda.is_available(): 9 | device = torch.device("cuda") 10 | else: 11 | device = torch.device("cpu") 12 | 13 | dtype = torch.float 14 | except ImportError: # pragma: no cover 15 | torch = None 16 | device = None 17 | dtype = None 18 | 19 | 20 | # This module initializes flags for optional dependencies 21 | try: # pragma: no cover 22 | import pandas 23 | 24 | HAS_PANDAS = True 25 | except ImportError: # pragma: no cover 26 | HAS_PANDAS = False 27 | pandas = None 28 | -------------------------------------------------------------------------------- /Pgmpy/independencies/__init__.py: -------------------------------------------------------------------------------- 1 | from .Independencies import Independencies, IndependenceAssertion 2 | 3 | __all__ = ["Independencies", "IndependenceAssertion"] 4 | -------------------------------------------------------------------------------- /Pgmpy/independencies/__pycache__/Independencies.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/independencies/__pycache__/Independencies.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/independencies/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/independencies/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/inference/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Inference 2 | from .ExactInference import VariableEliminationJIT 3 | from .ExactInferenceTorch import VariableEliminationJIT_torch 4 | 5 | __all__ = [ 6 | "Inference", 7 | "VariableEliminationJIT", 8 | "VariableEliminationJIT_torch", 9 | ] 10 | -------------------------------------------------------------------------------- /Pgmpy/inference/__pycache__/EliminationOrder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/inference/__pycache__/EliminationOrder.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/inference/__pycache__/ExactInference.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/inference/__pycache__/ExactInference.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/inference/__pycache__/ExactInferenceTorch.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/inference/__pycache__/ExactInferenceTorch.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/inference/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/inference/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/inference/__pycache__/base.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/inference/__pycache__/base.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/inference/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from collections import defaultdict 4 | from itertools import chain 5 | 6 | from Pgmpy.models import BayesianModel 7 | from Pgmpy.models import JunctionTree 8 | from Pgmpy.factors.discrete import TabularCPD 9 | 10 | 11 | class Inference(object): 12 | """ 13 | Base class for all inference algorithms. 14 | 15 | Converts BayesianModel and MarkovModel to a uniform representation so that inference 16 | algorithms can be applied. Also it checks if all the associated CPDs / Factors are 17 | consistent with the model. 18 | 19 | Initialize inference for a model. 20 | 21 | Parameters 22 | ---------- 23 | model: pgmpy.models.BayesianModel or pgmpy.models.MarkovModel or pgmpy.models.NoisyOrModel 24 | model for which to initialize the inference object. 25 | """ 26 | 27 | def __init__(self, model): 28 | self.model = model 29 | model.check_model() 30 | 31 | if isinstance(model, JunctionTree): 32 | self.variables = set(chain(*model.nodes())) 33 | else: 34 | self.variables = model.nodes() 35 | 36 | self.cardinality = {} 37 | self.factors = defaultdict(list) 38 | 39 | if isinstance(model, BayesianModel): 40 | self.state_names_map = {} 41 | for node in model.nodes(): 42 | cpd = model.get_cpds(node) 43 | if isinstance(cpd, TabularCPD): 44 | self.cardinality[node] = cpd.variable_card 45 | cpd = cpd.to_factor() 46 | for var in cpd.scope(): 47 | self.factors[var].append(cpd) 48 | self.state_names_map.update(cpd.no_to_name) 49 | -------------------------------------------------------------------------------- /Pgmpy/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .BayesianModel import BayesianModel 2 | from .ClusterGraph import ClusterGraph 3 | from .JunctionTree import JunctionTree 4 | from .MarkovModel import MarkovModel 5 | from .LinearGaussianBayesianNetwork import LinearGaussianBayesianNetwork 6 | 7 | __all__ = [ 8 | "BayesianModel", 9 | "MarkovModel", 10 | "JunctionTree", 11 | "ClusterGraph", 12 | "LinearGaussianBayesianNetwork", 13 | ] 14 | -------------------------------------------------------------------------------- /Pgmpy/models/__pycache__/BayesianModel.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/models/__pycache__/BayesianModel.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/models/__pycache__/ClusterGraph.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/models/__pycache__/ClusterGraph.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/models/__pycache__/JunctionTree.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/models/__pycache__/JunctionTree.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/models/__pycache__/LinearGaussianBayesianNetwork.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/models/__pycache__/LinearGaussianBayesianNetwork.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/models/__pycache__/MarkovModel.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/models/__pycache__/MarkovModel.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/models/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/models/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/readwrite/__init__.py: -------------------------------------------------------------------------------- 1 | from .XMLBIF import XMLBIFReader, XMLBIFWriter 2 | from .PomdpX import PomdpXReader, PomdpXWriter 3 | from .XMLBeliefNetwork import XBNReader, XBNWriter 4 | from .UAI import UAIReader, UAIWriter 5 | from .BIF import BIFReader, BIFWriter 6 | 7 | __all__ = [ 8 | "XMLBIFReader", 9 | "XMLBIFWriter", 10 | "XBNReader", 11 | "XBNWriter", 12 | "PomdpXReader", 13 | "PomdpXWriter", 14 | "UAIReader", 15 | "UAIWriter", 16 | "BIFReader", 17 | "BIFWriter", 18 | ] 19 | -------------------------------------------------------------------------------- /Pgmpy/readwrite/__pycache__/BIF.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/readwrite/__pycache__/BIF.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/readwrite/__pycache__/PomdpX.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/readwrite/__pycache__/PomdpX.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/readwrite/__pycache__/UAI.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/readwrite/__pycache__/UAI.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/readwrite/__pycache__/XMLBIF.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/readwrite/__pycache__/XMLBIF.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/readwrite/__pycache__/XMLBeliefNetwork.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/readwrite/__pycache__/XMLBeliefNetwork.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/readwrite/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/readwrite/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/sampling/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import ( 2 | BaseGradLogPDF, 3 | GradLogPDFGaussian, 4 | LeapFrog, 5 | ModifiedEuler, 6 | BaseSimulateHamiltonianDynamics, 7 | _return_samples, 8 | ) 9 | from .HMC import HamiltonianMC, HamiltonianMCDA 10 | from .NUTS import NoUTurnSampler, NoUTurnSamplerDA 11 | from .Sampling import BayesianModelSampling 12 | 13 | __all__ = [ 14 | "LeapFrog", 15 | "ModifiedEuler", 16 | "BaseSimulateHamiltonianDynamics", 17 | "BaseGradLogPDF", 18 | "GradLogPDFGaussian", 19 | "_return_samples", 20 | "HamiltonianMC", 21 | "HamiltonianMCDA", 22 | "NoUTurnSampler", 23 | "NoUTurnSamplerDA", 24 | "BayesianModelSampling", 25 | "GibbsSampling", 26 | ] 27 | -------------------------------------------------------------------------------- /Pgmpy/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .mathext import cartesian, sample_discrete 2 | from .state_name import StateNameMixin 3 | from .check_functions import _check_1d_array_object, _check_length_equal 4 | from .optimizer import optimize, pinverse 5 | 6 | 7 | __all__ = [ 8 | "cartesian", 9 | "sample_discrete", 10 | "StateNameMixin", 11 | "_check_1d_array_object", 12 | "_check_length_equal", 13 | "optimize", 14 | "pinverse", 15 | ] 16 | -------------------------------------------------------------------------------- /Pgmpy/utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/utils/__pycache__/check_functions.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/utils/__pycache__/check_functions.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/utils/__pycache__/decorators.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/utils/__pycache__/decorators.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/utils/__pycache__/mathext.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/utils/__pycache__/mathext.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/utils/__pycache__/optimizer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/utils/__pycache__/optimizer.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/utils/__pycache__/state_name.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Pgmpy/utils/__pycache__/state_name.cpython-37.pyc -------------------------------------------------------------------------------- /Pgmpy/utils/check_functions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains simple check functions 3 | """ 4 | import numpy as np 5 | 6 | 7 | def _check_1d_array_object(parameter, name_param): 8 | """ 9 | Checks whether given parameter is a 1d array like object, and returns a numpy array object 10 | """ 11 | if isinstance(parameter, (np.ndarray, list, tuple, np.matrix)): 12 | parameter = np.array(parameter) 13 | if parameter.ndim != 1: 14 | raise TypeError("{} should be a 1d array type object".format(name_param)) 15 | else: 16 | raise TypeError("{} should be a 1d array type object".format(name_param)) 17 | 18 | return parameter 19 | 20 | 21 | def _check_length_equal(param_1, param_2, name_param_1, name_param_2): 22 | """ 23 | Raises an error when the length of given two arguments is not equal 24 | """ 25 | if len(param_1) != len(param_2): 26 | raise ValueError( 27 | "Length of {} must be same as Length of {}".format( 28 | name_param_1, name_param_2 29 | ) 30 | ) 31 | -------------------------------------------------------------------------------- /Pgmpy/utils/decorators.py: -------------------------------------------------------------------------------- 1 | def convert_args_tuple(func): 2 | def _convert_param_to_tuples( 3 | obj, variable, parents=tuple(), complete_samples_only=None 4 | ): 5 | parents = tuple(parents) 6 | return func(obj, variable, parents, complete_samples_only) 7 | 8 | return _convert_param_to_tuples 9 | -------------------------------------------------------------------------------- /Pgmpy/utils/mathext.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | import numpy as np 4 | from itertools import combinations, chain 5 | 6 | 7 | State = namedtuple("State", ["var", "state"]) 8 | 9 | 10 | def cartesian(arrays, out=None): 11 | """Generate a cartesian product of input arrays. 12 | 13 | Parameters 14 | ---------- 15 | arrays : list of array-like 16 | 1-D arrays to form the cartesian product of. 17 | out : ndarray 18 | Array to place the cartesian product in. 19 | 20 | Returns 21 | ------- 22 | out : ndarray 23 | 2-D array of shape (M, len(arrays)) containing cartesian products 24 | formed of input arrays. 25 | 26 | Examples 27 | -------- 28 | >>> cartesian(([1, 2, 3], [4, 5], [6, 7])) 29 | array([[1, 4, 6], 30 | [1, 4, 7], 31 | [1, 5, 6], 32 | [1, 5, 7], 33 | [2, 4, 6], 34 | [2, 4, 7], 35 | [2, 5, 6], 36 | [2, 5, 7], 37 | [3, 4, 6], 38 | [3, 4, 7], 39 | [3, 5, 6], 40 | [3, 5, 7]]) 41 | 42 | """ 43 | arrays = [np.asarray(x) for x in arrays] 44 | shape = (len(x) for x in arrays) 45 | dtype = arrays[0].dtype 46 | 47 | ix = np.indices(shape) 48 | ix = ix.reshape(len(arrays), -1).T 49 | 50 | if out is None: 51 | out = np.empty_like(ix, dtype=dtype) 52 | 53 | for n, arr in enumerate(arrays): 54 | out[:, n] = arrays[n][ix[:, n]] 55 | 56 | return out 57 | 58 | 59 | def sample_discrete(values, weights, size=1, rng=None): 60 | """ 61 | Generate a sample of given size, given a probability mass function. 62 | 63 | Parameters 64 | ---------- 65 | values: numpy.array: Array of all possible values that the random variable 66 | can take. 67 | weights: numpy.array or list of numpy.array: Array(s) representing the PMF of the random variable. 68 | size: int: Size of the sample to be generated. 69 | rng : numpy.random.RandomState | None : random number generator 70 | 71 | Returns 72 | ------- 73 | numpy.array: of values of the random variable sampled from the given PMF. 74 | """ 75 | if rng is None: 76 | rng = np.random 77 | weights = np.array(weights) 78 | if weights.ndim == 1: 79 | return rng.choice(values, size=size, p=weights) 80 | else: 81 | return np.fromiter(map(lambda t: rng.choice(values, p=t), weights), dtype="int") 82 | 83 | 84 | def powerset(l): 85 | """ 86 | Generates all subsets of list `l` (as tuples). 87 | """ 88 | return chain.from_iterable(combinations(l, r) for r in range(len(l) + 1)) 89 | -------------------------------------------------------------------------------- /Pgmpy/utils/optimizer.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from math import isclose 3 | 4 | 5 | try: # pragma: no cover 6 | import torch 7 | 8 | optim = torch.optim 9 | except ImportError: # pragma: no cover 10 | optim = None 11 | 12 | 13 | def pinverse(t): 14 | """ 15 | Computes the pseudo-inverse of a matrix using SVD. 16 | 17 | Parameters 18 | ---------- 19 | t: torch.tensor 20 | The matrix whose inverse is to be calculated. 21 | 22 | Returns 23 | ------- 24 | torch.tensor: Inverse of the matrix `t`. 25 | """ 26 | u, s, v = t.svd() 27 | t_inv = v @ torch.diag(torch.where(s != 0, 1 / s, s)) @ u.t() 28 | return t_inv 29 | 30 | 31 | def optimize( 32 | loss_fn, params={}, loss_args={}, opt="adam", max_iter=10000, exit_delta=1e-4 33 | ): 34 | """ 35 | Generic function to optimize loss functions. 36 | 37 | Parameters 38 | ---------- 39 | loss_fn: Function 40 | The function to optimize. It must return a torch.Tensor object. 41 | 42 | params: dict {str: torch.Tensor} 43 | The parameters which need to be optimized along with their initial values. The 44 | dictionary should be of the form: {variable name: initial value} 45 | 46 | loss_args: dict {str: torch.Tensor} 47 | Extra parameters which loss function needs to compute the loss. 48 | 49 | opt: str | Instance of torch.optim.Optimizer 50 | The optimizer to use. Should either be an instance of torch.optim or a str. 51 | When str is given initializes the optimizer with default parameters. 52 | 53 | If str the options are: 54 | 1. Adadelta: Adadelta algorithm (Ref: https://arxiv.org/abs/1212.5701) 55 | 2. Adagrad: Adagrad algorithm (Ref: http://jmlr.org/papers/v12/duchi11a.html) 56 | 3. Adam: Adam algorithm (Ref: https://arxiv.org/abs/1412.6980) 57 | 4. SparseAdam: Lazy version of Adam. Suitable for sparse tensors. 58 | 5. Adamax: Adamax algorithm (variant of Adam based on infinity norm) 59 | 6. ASGD: Averaged Stochastic Gradient Descent (Ref: https://dl.acm.org/citation.cfm?id=131098) 60 | 7. LBFGS: L-BFGS Algorithm 61 | 8. RMSprop: RMSprop Algorithm (Ref: https://arxiv.org/abs/1308.0850v5) 62 | 9. Rprop: Resilient Backpropagation Algorithm 63 | 10. SGD: Stochastic Gradient Descent. 64 | 65 | max_iter: int (default: 10000) 66 | The maximum number of iterations to run the optimization for. 67 | 68 | exit_delta: float 69 | The optmization exit criteria. When change in loss in an iteration is less than 70 | `exit_delta` the optimizer returns the values. 71 | 72 | Returns 73 | ------- 74 | dict: The values that were given in params in the same format. 75 | 76 | Examples 77 | -------- 78 | """ 79 | # TODO: Add option to modify the optimizers. 80 | init_loss = float("inf") 81 | 82 | if isinstance(opt, str): 83 | opt_dict = { 84 | "adadelta": optim.Adadelta, 85 | "adagrad": optim.Adagrad, 86 | "adam": optim.Adam, 87 | "sparseadam": optim.SparseAdam, 88 | "adamax": optim.Adamax, 89 | "asgd": optim.ASGD, 90 | "lbfgs": optim.LBFGS, 91 | "rmsprop": optim.RMSprop, 92 | "rprop": optim.Rprop, 93 | "sgd": optim.SGD, 94 | } 95 | opt = opt_dict[opt.lower()](params.values()) 96 | 97 | for t in range(max_iter): 98 | 99 | def closure(): 100 | opt.zero_grad() 101 | loss = loss_fn(params, loss_args) 102 | loss.backward() 103 | return loss 104 | 105 | opt.step(closure=closure) 106 | 107 | if isclose(init_loss, closure().item(), abs_tol=exit_delta): 108 | warnings.warn( 109 | "Converged after {iterations} iterations.".format(iterations=t) 110 | ) 111 | return params 112 | else: 113 | init_loss = closure().item() 114 | 115 | warnings.warn( 116 | """Couldn't converge after {iterations} iterations. Try increasing max_iter or change 117 | optimizer parameters""".format( 118 | iterations=max_iter 119 | ) 120 | ) 121 | return params 122 | -------------------------------------------------------------------------------- /Pgmpy/utils/sets.py: -------------------------------------------------------------------------------- 1 | from collections import Iterable 2 | from itertools import combinations, chain 3 | 4 | 5 | def _variable_or_iterable_to_set(x): 6 | """ 7 | Convert variable, set, or iterable x to a frozenset. 8 | 9 | If x is None, returns the empty set. 10 | 11 | Parameters 12 | --------- 13 | x : None, str or Iterable[str] 14 | 15 | Returns 16 | ------- 17 | frozenset : frozenset representation of string or iterable input 18 | """ 19 | if x is None: 20 | return frozenset([]) 21 | 22 | if isinstance(x, str): 23 | return frozenset([x]) 24 | 25 | if not isinstance(x, Iterable) or not all(isinstance(xx, str) for xx in x): 26 | raise ValueError( 27 | "{} is expected to be either a string, set of strings, or an iterable of strings".format( 28 | x 29 | ) 30 | ) 31 | 32 | return frozenset(x) 33 | 34 | 35 | def _powerset(iterable): 36 | """ 37 | https://docs.python.org/3/library/itertools.html#recipes 38 | powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3) 39 | 40 | Parameters 41 | ---------- 42 | iterable: any iterable 43 | 44 | Returns 45 | ------- 46 | chain: a generator of the powerset of the input 47 | """ 48 | s = list(iterable) 49 | return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1)) 50 | -------------------------------------------------------------------------------- /Pgmpy/utils/state_name.py: -------------------------------------------------------------------------------- 1 | import copy 2 | class StateNameMixin: 3 | """ 4 | This class is inherited by classes which deal with state names of variables. 5 | The state names are stored in instances of `StateNameMixin`. The conversion between 6 | state number and names are also handled by methods in this class. 7 | """ 8 | 9 | def store_state_names(self, variables, cardinality, state_names): 10 | """ 11 | Initialize an instance of StateNameMixin. 12 | """ 13 | if state_names: 14 | for key, value in state_names.items(): 15 | if not isinstance(value, (list, tuple)): 16 | raise ValueError( 17 | "The state names must be for the form: {variable: list_of_states}" 18 | ) 19 | elif not len(set(value)) == len(value): 20 | raise ValueError( 21 | "Repeated statenames for variable: {var}".format(var=key) 22 | ) 23 | 24 | # Make a copy, so that the original object does't get modified after operations. 25 | self.state_names = copy.deepcopy(state_names) 26 | # Create maps for easy access to specific state names of state numbers. 27 | if state_names: 28 | self.name_to_no = {} 29 | self.no_to_name = {} 30 | for key, values in self.state_names.items(): 31 | self.name_to_no[key] = { 32 | name: no for no, name in enumerate(self.state_names[key]) 33 | } 34 | self.no_to_name[key] = { 35 | no: name for no, name in enumerate(self.state_names[key]) 36 | } 37 | else: 38 | self.state_names = { 39 | var: list(range(int(cardinality[index]))) 40 | for index, var in enumerate(variables) 41 | } 42 | self.name_to_no = { 43 | var: {i: i for i in range(int(cardinality[index]))} 44 | for index, var in enumerate(variables) 45 | } 46 | self.no_to_name = self.name_to_no.copy() 47 | 48 | def get_state_names(self, var, state_no): 49 | """ 50 | Given `var` and `state_no` returns the state name. 51 | """ 52 | if self.state_names: 53 | return self.no_to_name[var][state_no] 54 | else: 55 | return state_no 56 | 57 | def get_state_no(self, var, state_name): 58 | """ 59 | Given `var` and `state_name` return the state number. 60 | """ 61 | if self.state_names: 62 | return self.name_to_no[var][state_name] 63 | else: 64 | return state_name 65 | 66 | def add_state_names(self, phi1): 67 | """ 68 | Updates the attributes of this class with another factor `phi1`. 69 | 70 | Parameters 71 | ---------- 72 | phi1: Instance of pgmpy.factors.DiscreteFactor 73 | The factor whose states and variables need to be added. 74 | """ 75 | self.state_names.update(phi1.state_names) 76 | self.name_to_no.update(phi1.name_to_no) 77 | self.no_to_name.update(phi1.no_to_name) 78 | 79 | def del_state_names(self, var_list): 80 | """ 81 | Deletes the state names for variables in var_list 82 | """ 83 | for var in var_list: 84 | del self.state_names[var] 85 | del self.name_to_no[var] 86 | del self.no_to_name[var] 87 | -------------------------------------------------------------------------------- /Sampling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Sampling/__init__.py -------------------------------------------------------------------------------- /Sampling/create_binned_cols.py: -------------------------------------------------------------------------------- 1 | import psycopg2 as pg 2 | 3 | 4 | SEL_TEMPLATE = "SELECT {COLS} FROM {TABLE} WHERE random() < {FRAC}" 5 | CREATE_TEMPLATE = "CREATE TABLE IF NOT EXISTS {TABLE_NAME} AS {SEL_SQL}" 6 | DROP_TEMPLATE = "DROP TABLE IF EXISTS {TABLE_NAME}" 7 | DROP_COL_TEMPLATE = "ALTER TABLE {TAB} DROP COLUMN IF EXISTS {COL};" 8 | 9 | NEW_TABLE_TEMPLATE = "{TABLE}_{SS}{PERCENTAGE}" 10 | COL_TEMPLATE = "{COL}_bin" 11 | # update info_type set debug='a' where id IN ('1', '2', '3', '4', '5'); 12 | 13 | CREATE_COL_TMP = "ALTER TABLE {TABLE} ADD COLUMN {COL} int;" 14 | UPDATE_TMP = "update {TABLE} set {COL}={VAL} where {KEY} IN ({BINVALS});" 15 | 16 | 17 | def create_binned_cols(db_conn_kwargs, bins, equivalent_keys, sampling_percentage, sampling_type): 18 | con = pg.connect(db_conn_kwargs) 19 | cursor = con.cursor() 20 | 21 | for k in bins: 22 | curvals = [] 23 | for v in bins[k].bins: 24 | # curvals.append(["'" + str(int(v2)) + "'" for v2 in v]) 25 | curvals.append([str(int(v2)) for v2 in v]) 26 | bins[k] = curvals 27 | 28 | sampling_frac = float(sampling_percentage) / 100.00 29 | for bkey, binvals in bins.items(): 30 | for key in equivalent_keys[bkey]: 31 | # create the sampled table, build all the tables on primary keys first 32 | table = key[0:key.find(".")] 33 | new_table = NEW_TABLE_TEMPLATE.format(TABLE=table, 34 | SS=sampling_type, 35 | PERCENTAGE=str(sampling_percentage)) 36 | count_sql = "SELECT COUNT(*) FROM {}".format(table) 37 | cursor.execute(count_sql) 38 | output = cursor.fetchall()[0][0] 39 | if output < 1000: 40 | cur_sampling_frac = 1.0 41 | else: 42 | cur_sampling_frac = sampling_frac 43 | 44 | new_table = new_table.replace(".", "d") 45 | print(new_table) 46 | 47 | sel_sql = "SELECT * FROM {} WHERE random() < {}".format( \ 48 | table, str(cur_sampling_frac)) 49 | create_sql = CREATE_TEMPLATE.format(TABLE_NAME=new_table, 50 | SEL_SQL=sel_sql) 51 | print(create_sql) 52 | 53 | cursor.execute(create_sql) 54 | con.commit() 55 | 56 | # lets create a new column for this table 57 | orig_col = key[key.find(".") + 1:] 58 | newcolname = COL_TEMPLATE.format(COL=orig_col) 59 | 60 | drop_col_sql = DROP_COL_TEMPLATE.format(TAB=new_table, 61 | COL=newcolname) 62 | print(drop_col_sql) 63 | cursor.execute(drop_col_sql) 64 | con.commit() 65 | 66 | create_col_sql = CREATE_COL_TMP.format(TABLE=new_table, 67 | COL=newcolname) 68 | print(create_col_sql) 69 | 70 | cursor.execute(create_col_sql) 71 | con.commit() 72 | 73 | newkey = key[key.find(".") + 1:] 74 | for bi, vals in enumerate(binvals): 75 | curbinvals = ','.join(vals) 76 | # UPDATE_TMP = "update {TABLE} set {COL}={VAL} where {KEY} IN ({BINVALS});" 77 | updatesql = UPDATE_TMP.format(TABLE=new_table, 78 | COL=newcolname, 79 | VAL=bi, 80 | KEY=newkey, 81 | BINVALS=curbinvals) 82 | # print(updatesql) 83 | print("updating bin: ", bi) 84 | cursor.execute(updatesql) 85 | con.commit() 86 | -------------------------------------------------------------------------------- /Sampling/load_sample.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | import os 4 | from Join_scheme.binning import apply_binning_to_data_value_count 5 | from Join_scheme.factor import Factor 6 | 7 | 8 | def load_sample_imdb(table_buckets, tables_alias, query_file_orders, join_keys, table_key_equivalent_group, 9 | SPERCENTAGE=1.0, qdir="/home/ubuntu/data_CE/saved_models/binned_cards/{}/job/all_job/"): 10 | qdir = qdir.format(SPERCENTAGE) 11 | all_sample_factors = [] 12 | for fn in query_file_orders: 13 | conditional_factors = load_sample_imdb_one_query(table_buckets, tables_alias, fn, join_keys, 14 | table_key_equivalent_group, SPERCENTAGE, qdir) 15 | all_sample_factors.append(conditional_factors) 16 | return all_sample_factors 17 | 18 | 19 | def load_sample_imdb_one_query(table_buckets, tables_alias, query_file_name, join_keys, table_key_equivalent_group, 20 | SPERCENTAGE=1.0, qdir="/home/ubuntu/data_CE/saved_models/binned_cards/{}/job/all_job/"): 21 | qdir = qdir.format(SPERCENTAGE) 22 | fpath = os.path.join(qdir, query_file_name) 23 | with open(fpath, "rb") as f: 24 | data = pickle.load(f) 25 | 26 | conditional_factors = dict() 27 | table_pdfs = dict() 28 | filter_size = dict() 29 | for i, alias in enumerate(data["all_aliases"]): 30 | cards = data["results"][i][0] 31 | if cards is None: 32 | continue 33 | column = data["all_columns"][i] 34 | alias = alias[0] 35 | key = tables_alias[alias] + "." + column 36 | n_bins = table_buckets[tables_alias[alias]].bin_sizes[key] 37 | pdfs = np.zeros(n_bins) 38 | for (j, val) in cards: 39 | if j is None: 40 | j = 0 41 | pdfs[j] += val 42 | table_len = np.sum(pdfs) 43 | if table_len == 0: 44 | # no sample satisfy the filter, set it with a small value 45 | table_len = 1 46 | pdfs = table_key_equivalent_group[tables_alias[alias]].pdfs[key] 47 | else: 48 | pdfs /= table_len 49 | if alias not in table_pdfs: 50 | table_pdfs[alias] = dict() 51 | filter_size[alias] = table_len 52 | table_pdfs[alias][key] = pdfs 53 | 54 | for alias in tables_alias: 55 | if alias in table_pdfs: 56 | table_len = min(table_key_equivalent_group[tables_alias[alias]].table_len, 57 | filter_size[alias]/(SPERCENTAGE/100)) 58 | na_values = table_key_equivalent_group[tables_alias[alias]].na_values 59 | conditional_factors[alias] = Factor(tables_alias[alias], table_len, list(table_pdfs[alias].keys()), 60 | table_pdfs[alias], na_values=na_values) 61 | else: 62 | #ground-truth distribution 63 | conditional_factors[alias] = table_key_equivalent_group[tables_alias[alias]] 64 | return conditional_factors 65 | 66 | 67 | def get_ground_truth_no_filter(equivalent_keys, data, bins, table_lens, na_values): 68 | all_factor_pdfs = dict() 69 | for PK in equivalent_keys: 70 | bin_value = bins[PK] 71 | for key in equivalent_keys[PK]: 72 | table = key.split(".")[0] 73 | temp = apply_binning_to_data_value_count(bin_value, data[key]) 74 | if table not in all_factor_pdfs: 75 | all_factor_pdfs[table] = dict() 76 | all_factor_pdfs[table][key] = temp / np.sum(temp) 77 | 78 | all_factors = dict() 79 | for table in all_factor_pdfs: 80 | all_factors[table] = Factor(table, table_lens[table], list(all_factor_pdfs[table].keys()), 81 | all_factor_pdfs[table], na_values=na_values[table]) 82 | return all_factors 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /Sampling/sample_on_the_fly.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from multiprocessing import Pool 3 | from Join_scheme.factor import Factor 4 | from Sampling.get_query_binned_cards import get_binned_sqls, exec_sql 5 | 6 | 7 | def sample_on_the_fly(sql, table_buckets, tables_alias, ground_truth_factors_no_filter, sampling_percentage, 8 | equivalent_keys, db_conn_kwargs): 9 | alltabs, allcols, allsqls = get_binned_sqls(sql, equivalent_keys, sampling_percentage) 10 | par_args = [] 11 | for sql in allsqls: 12 | par_args.append((sql, db_conn_kwargs)) 13 | 14 | with Pool(processes=8) as pool: 15 | res = pool.starmap(exec_sql, par_args) 16 | 17 | conditional_factors = dict() 18 | table_pdfs = dict() 19 | filter_size = dict() 20 | 21 | for i, alias in enumerate(alltabs): 22 | cards = res[i][0] 23 | if cards is None: 24 | continue 25 | column = allcols[i] 26 | alias = alias[0] 27 | key = tables_alias[alias] + "." + column 28 | n_bins = table_buckets[tables_alias[alias]].bin_sizes[key] 29 | pdfs = np.zeros(n_bins) 30 | for (j, val) in cards: 31 | if j is None: 32 | j = 0 33 | pdfs[j] += val 34 | table_len = np.sum(pdfs) 35 | if table_len == 0: 36 | # no sample satisfy the filter, set it with a small value 37 | table_len = 1 38 | pdfs = ground_truth_factors_no_filter[tables_alias[alias]].pdfs[key] 39 | else: 40 | pdfs /= table_len 41 | if alias not in table_pdfs: 42 | table_pdfs[alias] = dict() 43 | filter_size[alias] = table_len 44 | table_pdfs[alias][key] = pdfs 45 | 46 | for alias in tables_alias: 47 | if alias in table_pdfs: 48 | table_len = min(ground_truth_factors_no_filter[tables_alias[alias]].table_len, 49 | filter_size[alias]/(sampling_percentage/100)) 50 | na_values = ground_truth_factors_no_filter[tables_alias[alias]].na_values 51 | conditional_factors[alias] = Factor(tables_alias[alias], table_len, list(table_pdfs[alias].keys()), 52 | table_pdfs[alias], na_values=na_values) 53 | else: 54 | #ground-truth distribution 55 | conditional_factors[alias] = ground_truth_factors_no_filter[tables_alias[alias]] 56 | return conditional_factors 57 | 58 | -------------------------------------------------------------------------------- /Sampling/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Sampling/utils/__init__.py -------------------------------------------------------------------------------- /Sampling/utils/query_storage.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import pickle 3 | import networkx as nx 4 | from networkx.readwrite import json_graph 5 | 6 | 7 | def load_sql_rep(fn, dummy=None): 8 | assert ".pkl" in fn 9 | try: 10 | with open(fn, "rb") as f: 11 | query = pickle.load(f) 12 | except: 13 | print(fn + " failed to load...") 14 | exit(-1) 15 | 16 | query["subset_graph"] = \ 17 | nx.OrderedDiGraph(json_graph.adjacency_graph(query["subset_graph"])) 18 | query["join_graph"] = json_graph.adjacency_graph(query["join_graph"]) 19 | if "subset_graph_paths" in query: 20 | query["subset_graph_paths"] = \ 21 | nx.OrderedDiGraph(json_graph.adjacency_graph(query["subset_graph_paths"])) 22 | 23 | return query 24 | 25 | def save_sql_rep(fn, cur_qrep): 26 | assert ".pkl" in fn 27 | qrep = copy.deepcopy(cur_qrep) 28 | qrep["join_graph"] = nx.adjacency_data(qrep["join_graph"]) 29 | qrep["subset_graph"] = nx.adjacency_data(qrep["subset_graph"]) 30 | 31 | with open(fn, "wb") as f: 32 | pickle.dump(qrep, f) 33 | -------------------------------------------------------------------------------- /Schemas/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Schemas/.DS_Store -------------------------------------------------------------------------------- /Schemas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Schemas/__init__.py -------------------------------------------------------------------------------- /Schemas/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Schemas/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Schemas/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Schemas/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Schemas/__pycache__/graph_representation.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Schemas/__pycache__/graph_representation.cpython-37.pyc -------------------------------------------------------------------------------- /Schemas/__pycache__/graph_representation.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Schemas/__pycache__/graph_representation.cpython-38.pyc -------------------------------------------------------------------------------- /Schemas/imdb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Schemas/imdb/__init__.py -------------------------------------------------------------------------------- /Schemas/ssb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Schemas/ssb/__init__.py -------------------------------------------------------------------------------- /Schemas/ssb/schema.py: -------------------------------------------------------------------------------- 1 | from Schemas.graph_representation import SchemaGraph, Table 2 | 3 | 4 | def gen_1gb_ssb_schema(csv_path): 5 | """ 6 | SSB schema for SF=1. 7 | """ 8 | 9 | schema = SchemaGraph() 10 | 11 | # tables 12 | # lineorder 13 | schema.add_table(Table('lineorder', 14 | attributes=['lo_orderkey', 'lo_linenumber', 'lo_custkey', 'lo_partkey', 'lo_suppkey', 15 | 'lo_orderdate', 'lo_orderpriority', 'lo_shippriority', 'lo_quantity', 16 | 'lo_extendedprice', 'lo_ordertotalprice', 'lo_discount', 'lo_revenue', 17 | 'lo_supplycost', 'lo_tax', 'lo_commitdate', 'lo_shipmode'], 18 | irrelevant_attributes=['lo_commitdate'], 19 | csv_file_location=csv_path.format('lineorder_sampled'), 20 | table_size=6001171, 21 | primary_key=[] 22 | )) 23 | 24 | # dwdate 25 | # dwdate.d_dayofweek -> dwdate.d_daynuminweek 26 | # dwdate.d_dayofweek -> dwdate.d_lastdayinweekfl 27 | # dwdate.d_month -> dwdate.d_monthnuminyear 28 | # dwdate.d_monthnuminyear -> dwdate.d_sellingseason 29 | # dwdate.d_daynuminyear -> dwdate.d_weeknuminyear 30 | schema.add_table( 31 | Table('date', 32 | attributes=['d_datekey', 'd_date', 'd_dayofweek', 'd_month', 'd_year', 'd_yearmonthnum', 'd_yearmonth', 33 | 'd_daynuminweek', 'd_daynuminmonth', 'd_daynuminyear', 'd_monthnuminyear', 'd_weeknuminyear', 34 | 'd_sellingseason', 'd_lastdayinweekfl', 'd_lastdayinmonthfl', 'd_holidayfl', 'd_weekdayfl'], 35 | irrelevant_attributes=['d_date'], 36 | csv_file_location=csv_path.format('date'), 37 | table_size=2556, 38 | primary_key=["d_datekey"] 39 | )) 40 | 41 | # customer 42 | # customer.c_city -> customer.c_nation 43 | # customer.c_nation -> customer.c_region 44 | schema.add_table( 45 | Table('customer', 46 | attributes=['c_custkey', 'c_name', 'c_address', 'c_city', 'c_nation', 'c_region', 'c_phone', 47 | 'c_mktsegment'], 48 | irrelevant_attributes=['c_name', 'c_address', 'c_phone'], 49 | csv_file_location=csv_path.format('customer'), 50 | table_size=30000, 51 | primary_key=["c_custkey"] 52 | )) 53 | 54 | # part 55 | # part.p_brand1 -> part.p_category 56 | # part.p_category -> part.p_mfgr 57 | schema.add_table( 58 | Table('part', 59 | attributes=['p_partkey', 'p_name', 'p_mfgr', 'p_category', 'p_brand1', 'p_color', 'p_type', 'p_size', 60 | 'p_container'], 61 | irrelevant_attributes=['p_name'], 62 | csv_file_location=csv_path.format('part'), 63 | table_size=200000, 64 | primary_key=["p_partkey"] 65 | )) 66 | 67 | # supplier 68 | # supplier.s_city -> supplier.s_nation 69 | # supplier.s_nation -> supplier.s_region 70 | schema.add_table( 71 | Table('supplier', attributes=['s_suppkey', 's_name', 's_address', 's_city', 's_nation', 's_region', 's_phone'], 72 | irrelevant_attributes=['s_name', 's_address', 's_phone'], 73 | csv_file_location=csv_path.format('supplier'), 74 | table_size=2000, 75 | primary_key=["s_suppkey"])) 76 | 77 | # relationships 78 | schema.add_relationship('lineorder', 'lo_custkey', 'customer', 'c_custkey') 79 | schema.add_relationship('lineorder', 'lo_partkey', 'part', 'p_partkey') 80 | schema.add_relationship('lineorder', 'lo_suppkey', 'supplier', 's_suppkey') 81 | schema.add_relationship('lineorder', 'lo_orderdate', 'date', 'd_datekey') 82 | return schema 83 | -------------------------------------------------------------------------------- /Schemas/stats/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Schemas/stats/__init__.py -------------------------------------------------------------------------------- /Schemas/stats/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Schemas/stats/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /Schemas/stats/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Schemas/stats/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Schemas/stats/__pycache__/schema.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Schemas/stats/__pycache__/schema.cpython-37.pyc -------------------------------------------------------------------------------- /Schemas/stats/__pycache__/schema.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/Schemas/stats/__pycache__/schema.cpython-38.pyc -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/10a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/10a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/10b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/10b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/10c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/10c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/11a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/11a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/11b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/11b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/11c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/11c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/11d.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/11d.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/12a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/12a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/12b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/12b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/12c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/12c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/13a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/13a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/13b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/13b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/13c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/13c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/13d.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/13d.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/14a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/14a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/14b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/14b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/14c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/14c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/15a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/15a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/15b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/15b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/15c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/15c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/15d.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/15d.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/16a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/16a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/16b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/16b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/16c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/16c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/16d.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/16d.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/17a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/17a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/17b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/17b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/17c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/17c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/17d.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/17d.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/17e.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/17e.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/17f.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/17f.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/18a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/18a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/18b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/18b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/18c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/18c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/19a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/19a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/19b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/19b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/19c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/19c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/19d.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/19d.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/1a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/1a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/1b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/1b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/1c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/1c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/1d.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/1d.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/20a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/20a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/20b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/20b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/20c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/20c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/21a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/21a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/21b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/21b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/21c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/21c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/22a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/22a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/22b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/22b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/22c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/22c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/22d.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/22d.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/23a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/23a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/23b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/23b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/23c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/23c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/24a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/24a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/24b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/24b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/25a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/25a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/25b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/25b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/25c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/25c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/26a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/26a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/26b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/26b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/26c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/26c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/27a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/27a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/27b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/27b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/27c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/27c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/28a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/28a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/28b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/28b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/28c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/28c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/29a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/29a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/29b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/29b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/29c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/29c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/2a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/2a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/2b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/2b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/2c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/2c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/2d.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/2d.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/30a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/30a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/30b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/30b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/30c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/30c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/31a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/31a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/31b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/31b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/31c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/31c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/32a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/32a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/32b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/32b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/33a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/33a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/33b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/33b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/33c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/33c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/3a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/3a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/3b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/3b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/3c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/3c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/4a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/4a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/4b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/4b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/4c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/4c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/5a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/5a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/5b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/5b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/5c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/5c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/6a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/6a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/6b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/6b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/6c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/6c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/6d.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/6d.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/6e.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/6e.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/6f.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/6f.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/7a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/7a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/7b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/7b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/7c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/7c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/8a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/8a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/8b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/8b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/8c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/8c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/8d.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/8d.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/9a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/9a.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/9b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/9b.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/9c.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/9c.pkl -------------------------------------------------------------------------------- /checkpoints/binned_cards_1.0/9d.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/binned_cards_1.0/9d.pkl -------------------------------------------------------------------------------- /checkpoints/derived_query_file.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/derived_query_file.pkl -------------------------------------------------------------------------------- /checkpoints/gt_no_filter.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuziniu/FactorJoin/b1ba71475c5e505dc2ca80681a529a539f2eda9f/checkpoints/gt_no_filter.pkl -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | numba 3 | pandas 4 | scipy 5 | Pgmpy 6 | networkx 7 | joblib 8 | psycopg2-binary 9 | scikit-learn 10 | sqlparse 11 | jenkspy 12 | pomegranate -------------------------------------------------------------------------------- /send_query.py: -------------------------------------------------------------------------------- 1 | import psycopg2 2 | import time 3 | import os 4 | import argparse 5 | import numpy as np 6 | 7 | 8 | def send_query(dataset, method_name, query_file, save_folder, iteration=None): 9 | conn = psycopg2.connect(database=dataset, user="postgres", password="postgres", host="127.0.0.1", port=5432,) 10 | cursor = conn.cursor() 11 | 12 | 13 | with open(query_file, "r") as f: 14 | queries = f.readlines() 15 | 16 | # cursor.execute('SET debug_card_est=true') 17 | # cursor.execute('SET print_sub_queries=true') 18 | # cursor.execute('SET print_single_tbl_queries=true') 19 | cursor.execute("SET ml_joinest_enabled=true;") 20 | cursor.execute("SET join_est_no=0;") 21 | cursor.execute(f"SET ml_joinest_fname='{method_name}';") 22 | 23 | 24 | planning_time = [] 25 | execution_time = [] 26 | for no, query in enumerate(queries): 27 | if "||" in query: 28 | query = query.split("||")[-1] 29 | print(f"Executing query {no}") 30 | start = time.time() 31 | cursor.execute("EXPLAIN ANALYZE " + query) 32 | res = cursor.fetchall() 33 | planning_time.append(float(res[-2][0].split(":")[-1].split("ms")[0].strip())) 34 | execution_time.append(float(res[-1][0].split(":")[-1].split("ms")[0].strip())) 35 | end = time.time() 36 | print(f"{no}-th query finished in {end-start}, with planning_time {planning_time[no]} ms and execution_time {execution_time[no]} ms" ) 37 | 38 | cursor.close() 39 | conn.close() 40 | save_file_name = method_name.split(".txt")[0] 41 | if iteration: 42 | np.save(save_folder + f"plan_time_{save_file_name}_iter{iteration}", np.asarray(planning_time)) 43 | np.save(save_folder + f"exec_time_{save_file_name}_iter{iteration}", np.asarray(execution_time)) 44 | else: 45 | np.save(save_folder + f"plan_time_{save_file_name}", np.asarray(planning_time)) 46 | np.save(save_folder + f"exec_time_{save_file_name}", np.asarray(execution_time)) 47 | 48 | 49 | if __name__ == '__main__': 50 | parser = argparse.ArgumentParser() 51 | parser.add_argument('--dataset', default='stats', help='Which dataset to be used') 52 | parser.add_argument('--method_name', default='stats_CEB_sub_queries_model_stats_greedy_50.txt', help='save estimates') 53 | parser.add_argument('--query_file', default='/home/ubuntu/data_CE/stats_CEB/stats_CEB.sql', help='Query file location') 54 | parser.add_argument('--with_true_card', action='store_true', help='Is true cardinality included in the query?') 55 | parser.add_argument('--save_folder', default='/home/ubuntu/data_CE/stats_CEB/', help='Query file location') 56 | parser.add_argument('--iteration', type=int, default=None, help='Number of iteration to read') 57 | 58 | args = parser.parse_args() 59 | 60 | if args.iteration: 61 | for i in range(args.iteration): 62 | send_query(args.dataset, args.method_name, args.query_file, args.save_folder, i+1) 63 | else: 64 | send_query(args.dataset, args.method_name, args.query_file, args.save_folder) 65 | 66 | 67 | --------------------------------------------------------------------------------