├── .gitattributes
├── Makefile
├── README.md
├── coding_wheel.txt
├── codon_usage_freq_table_human.csv
├── codon_usage_freq_table_yeast.csv
├── gflags.py
├── license.txt
├── lineardesign
├── pic
└── baidu_research_logo.jpg
├── src
├── Utils
│ ├── base.h
│ ├── codon.h
│ ├── common.h
│ ├── constants.h
│ ├── flat.h
│ ├── libraries
│ │ ├── LinearDesign_Mac_M1.so
│ │ ├── LinearDesign_Mac_x86.so
│ │ ├── LinearDesign_linux64.so
│ │ └── LinearDesign_linux64_old.so
│ ├── network.h
│ ├── reader.h
│ └── utility_v.h
├── backtrace_iter.cc
├── beam_cky_parser.cc
├── beam_cky_parser.h
└── linear_design.cpp
└── testseq
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.cpp linguist-language=c++
2 | *.h linguist-language=c++
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 |
2 | CLA=clang++
3 | CXX=g++
4 | CXXFLAGS=-std=c++11 -Ofast -DFINAL_CHECK -DSPECIAL_HP -fpermissive
5 | DEPS=src/beam_cky_parser.cc src/beam_cky_parser.h src/backtrace_iter.cc src/Utils/reader.h src/Utils/network.h src/Utils/codon.h src/Utils/utility_v.h src/Utils/common.h src/Utils/base.h
6 | BIN=bin/LinearDesign_2D
7 | UNAME_S := $(shell uname -s)
8 | UNAME_M := $(shell uname -m)
9 |
10 | lineardesign_2D: $(DEPS)
11 | @echo "Compiling" $@ "from" $< "..."
12 | chmod +x lineardesign
13 | mkdir -p ./bin
14 | export LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH
15 |
16 | ifeq ($(UNAME_S), Linux)
17 | if $(CXX) $(CXXFLAGS) src/linear_design.cpp -o bin/LinearDesign_2D src/Utils/libraries/LinearDesign_linux64.so; then \
18 | echo "Linux system; compiled with g++; finished."; \
19 | echo "Compilation Succeed!"; \
20 | else \
21 | echo "Try another .so file."; \
22 | if $(CXX) $(CXXFLAGS) src/linear_design.cpp -o bin/LinearDesign_2D src/Utils/libraries/LinearDesign_linux64_old.so; then \
23 | echo "Linux system; compiled with g++; finished."; \
24 | echo "Compilation Succeed!"; \
25 | else \
26 | echo "Compilation failed! Make sure it is either Linux-64 or Mac."; \
27 | fi \
28 | fi
29 | else
30 | if [[ $(UNAME_M) == 'arm64' ]]; then \
31 | if $(CLA) $(CXXFLAGS) src/linear_design.cpp -o bin/LinearDesign_2D src/Utils/libraries/LinearDesign_Mac_M1.so; then \
32 | echo "Mac M1 system; compiled with clang++; finished."; \
33 | echo "Compilation Succeed!"; \
34 | echo "You may encounter a pop-up message at the first run. If so, please go to System Preferences -> Security & Privacy -> General to allow LinearDesign_Mac_M1.so to open. See README.md for details."; \
35 | else \
36 | echo "Compilation failed! Make sure it is either Linux-64 or Mac."; \
37 | fi \
38 | else \
39 | if $(CLA) $(CXXFLAGS) src/linear_design.cpp -o bin/LinearDesign_2D src/Utils/libraries/LinearDesign_Mac_x86.so; then \
40 | echo "Mac x86_64 system; compiled with clang++; finished."; \
41 | echo "Compilation Succeed!"; \
42 | echo "You may encounter a pop-up message at the first run. If so, please go to System Preferences -> Security & Privacy -> General to allow LinearDesign_Mac_x86.so to open. See README.md for details."; \
43 | else \
44 | echo "Compilation failed! Make sure it is either Linux-64 or Mac."; \
45 | fi \
46 | fi
47 | endif
48 |
49 |
50 | .PHONY : clean
51 |
52 | clean:
53 | rm -f $(BIN)
54 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Algorithm for Optimized mRNA Design Improves Stability and Immunogenicity (LinearDesign)
4 | 
5 |
6 |
7 | This repository contains the source code for the LinearDesign project.
8 |
9 | He Zhang†, Liang Zhang†, Ang Lin†, Congcong Xu†, Ziyu Li, Kaibo Liu, Boxiang Liu, Xiaopin Ma, Fanfan Zhao, Huiling Jiang, Chunxiu Chen, Haifa Shen, Hangwen Li*, David H. Mathews*, Yujian Zhang*, Liang Huang†*#. Algorithm for Optimized mRNA Design Improves Stability and Immunogenicity. Nature [https://doi.org/10.1038/s41586-023-06127-z](https://doi.org/10.1038/s41586-023-06127-z) (2023)
10 |
11 | † contributed equally,
12 | \* corresponding authors,
13 | # lead corresponding author
14 |
15 | For questions, please contact the lead corresponding author at .
16 |
17 | ## Dependencies
18 | Clang 11.0.0 (or above) or GCC 4.8.5 (or above)
19 |
20 | python2.7
21 |
22 | ## To Compile
23 | ```
24 | make
25 | ```
26 |
27 | ## To Run
28 | The LinearDesign program can be run with:
29 | ```
30 | echo SEQUENCE | ./lineardesign [OPTIONS]
31 |
32 | OR
33 |
34 | cat FASTA_FILE | ./lineardesign [OPTIONS]
35 | ```
36 |
37 | OPTIONS:
38 | ```
39 | --lambda LAMBDA or -l LAMBDA
40 | ```
41 | Set LAMBDA, a hyperparameter balancing MFE and CAI. (default 0.0)
42 | ```
43 | --codonusage FILE_NAME or -c FILE_NAME
44 | ```
45 | Import a Codon Usage Frequency Table. See "codon_usage_freq_table_human.csv" for the format.
46 | (default: using human codon usage frequency table)
47 | ```
48 | --verbose or -v
49 | ```
50 | Print out more details. (default False)
51 |
52 | For Macbook, users may encounter a pop-up message at the first run.
53 | For Mac-M1 system, the message is:
54 | ```
55 | "LinearDesign_Mac_M1.so" can't be opened because Apple cannot check it for malicious software.
56 | ```
57 | For Mac-Intel system, the message is:
58 | ```
59 | "LinearDesign_Mac_Intel.so" cannot be opened because it is from an unidentified developer.
60 | ```
61 | If so, please go to "System Preferences -> Security & Privacy -> General" to allow LinearDesign-Mac-M1.so (or LinearDesign-Mac-Intel.so) to open.
62 |
63 | ## Example: Single Sequence Design
64 | ```
65 | echo MNDTEAI | ./lineardesign
66 | mRNA sequence: AUGAACGAUACGGAGGCGAUC
67 | mRNA structure: ......(((.((....)))))
68 | mRNA folding free energy: -1.10 kcal/mol; mRNA CAI: 0.695
69 | ```
70 |
71 | ## Example: Multiple Sequences Design with Option --lambda (-l)
72 | ```
73 | cat testseq | ./lineardesign --lambda 3
74 | >seq1
75 | mRNA sequence: AUGCCAAACACCCUGGCAUGCCCC
76 | mRNA structure: ((((((.......)))))).....
77 | mRNA folding free energy: -6.00 kcal/mol; mRNA CAI: 0.910
78 |
79 | >seq2
80 | mRNA sequence: AUGCUGGAUCAGGUGAACAAGCUGAAGUACCCAGAGGUGAGCCUGACCUGA
81 | mRNA structure: .....((.((((((..((...(((.......)))..))..))))))))...
82 | mRNA folding free energy: -13.50 kcal/mol; mRNA CAI: 0.979
83 | ```
84 |
85 | ## Example: Option --codonusage (-c)
86 | ```
87 | echo MNDTEAI | ./lineardesign -l 0.3 --codonusage codon_usage_freq_table_yeast.csv
88 | mRNA sequence: AUGAAUGAUACGGAAGCGAUC
89 | mRNA structure: ......(((.((....)))))
90 | mRNA folding free energy: -1.10 kcal/mol; mRNA CAI: 0.670
91 | ```
92 |
93 | ## Example: Option --verbose (-v)
94 | ```
95 | echo MNDTEAI | ./lineardesign --verbose
96 | Input protein: MNDTEAI
97 | Using lambda = 0; Using codon frequency table = codon_usage_freq_table_human.csv
98 | mRNA sequence: AUGAACGAUACGGAGGCGAUC
99 | mRNA structure: ......(((.((....)))))
100 | mRNA folding free energy: -1.10 kcal/mol; mRNA CAI: 0.695
101 | Runtime: 0.002 seconds
102 | ```
103 |
104 |
105 | ## Declarations
106 | Baidu Research has filed a patent for the LinearDesign algorithm that lists He Zhang, Liang Zhang, Ziyu Li, Kaibo Liu, Boxiang Liu, and Liang Huang as inventors.
107 |
--------------------------------------------------------------------------------
/coding_wheel.txt:
--------------------------------------------------------------------------------
1 | Phe U U CU
2 | Leu C U GCUA U U GA
3 | Ser U C GCUA A G CU
4 | Tyr U A CU
5 | STOP U A GA U G A
6 | Cys U G CU
7 | Trp U G G
8 | Pro C C GCUA
9 | His C A CU
10 | Gln C A GA
11 | Arg C G GCUA A G GA
12 | Ile A U CUA
13 | Met A U G
14 | Thr A C GCUA
15 | Asn A A CU
16 | Lys A A GA
17 | Val G U GCUA
18 | Asp G A CU
19 | Glu G A GA
20 | Gly G G GCUA
21 | Ala G C GCUA
--------------------------------------------------------------------------------
/codon_usage_freq_table_human.csv:
--------------------------------------------------------------------------------
1 | #,,
2 | UAA,*,0.28
3 | UAG,*,0.2
4 | UGA,*,0.52
5 | GCU,A,0.26
6 | GCC,A,0.4
7 | GCA,A,0.23
8 | GCG,A,0.11
9 | UGU,C,0.45
10 | UGC,C,0.55
11 | GAU,D,0.46
12 | GAC,D,0.54
13 | GAA,E,0.42
14 | GAG,E,0.58
15 | UUU,F,0.45
16 | UUC,F,0.55
17 | GGU,G,0.16
18 | GGC,G,0.34
19 | GGA,G,0.25
20 | GGG,G,0.25
21 | CAU,H,0.41
22 | CAC,H,0.59
23 | AUU,I,0.36
24 | AUC,I,0.48
25 | AUA,I,0.16
26 | AAA,K,0.42
27 | AAG,K,0.58
28 | UUA,L,0.07
29 | UUG,L,0.13
30 | CUU,L,0.13
31 | CUC,L,0.2
32 | CUA,L,0.07
33 | CUG,L,0.41
34 | AUG,M,1
35 | AAU,N,0.46
36 | AAC,N,0.54
37 | CCU,P,0.28
38 | CCC,P,0.33
39 | CCA,P,0.27
40 | CCG,P,0.11
41 | CAA,Q,0.25
42 | CAG,Q,0.75
43 | CGU,R,0.08
44 | CGC,R,0.19
45 | CGA,R,0.11
46 | CGG,R,0.21
47 | AGA,R,0.2
48 | AGG,R,0.2
49 | UCU,S,0.18
50 | UCC,S,0.22
51 | UCA,S,0.15
52 | UCG,S,0.06
53 | AGU,S,0.15
54 | AGC,S,0.24
55 | ACU,T,0.24
56 | ACC,T,0.36
57 | ACA,T,0.28
58 | ACG,T,0.12
59 | GUU,V,0.18
60 | GUC,V,0.24
61 | GUA,V,0.11
62 | GUG,V,0.47
63 | UGG,W,1
64 | UAU,Y,0.43
65 | UAC,Y,0.57
66 |
--------------------------------------------------------------------------------
/codon_usage_freq_table_yeast.csv:
--------------------------------------------------------------------------------
1 | #,,
2 | UAA,*,0.48
3 | UAG,*,0.24
4 | UGA,*,0.29
5 | GCU,A,0.38
6 | GCC,A,0.22
7 | GCA,A,0.29
8 | GCG,A,0.11
9 | UGU,C,0.63
10 | UGC,C,0.37
11 | GAU,D,0.65
12 | GAC,D,0.35
13 | GAA,E,0.71
14 | GAG,E,0.29
15 | UUU,F,0.59
16 | UUC,F,0.41
17 | GGU,G,0.47
18 | GGC,G,0.19
19 | GGA,G,0.22
20 | GGG,G,0.12
21 | CAU,H,0.64
22 | CAC,H,0.36
23 | AUU,I,0.46
24 | AUC,I,0.26
25 | AUA,I,0.27
26 | AAA,K,0.58
27 | AAG,K,0.42
28 | UUA,L,0.28
29 | UUG,L,0.29
30 | CUU,L,0.13
31 | CUC,L,0.06
32 | CUA,L,0.14
33 | CUG,L,0.11
34 | AUG,M,1
35 | AAU,N,0.59
36 | AAC,N,0.41
37 | CCU,P,0.31
38 | CCC,P,0.15
39 | CCA,P,0.41
40 | CCG,P,0.12
41 | CAA,Q,0.69
42 | CAG,Q,0.31
43 | CGU,R,0.15
44 | CGC,R,0.06
45 | CGA,R,0.07
46 | CGG,R,0.04
47 | AGA,R,0.48
48 | AGG,R,0.21
49 | UCU,S,0.26
50 | UCC,S,0.16
51 | UCA,S,0.21
52 | UCG,S,0.1
53 | AGU,S,0.16
54 | AGC,S,0.11
55 | ACU,T,0.35
56 | ACC,T,0.22
57 | ACA,T,0.3
58 | ACG,T,0.13
59 | GUU,V,0.39
60 | GUC,V,0.21
61 | GUA,V,0.21
62 | GUG,V,0.19
63 | UGG,W,1
64 | UAU,Y,0.56
65 | UAC,Y,0.44
66 |
--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
1 | The LinearDesign code is freely accessible to all interested parties.
2 | It is free for academic, non-profit, and research use, and can be licensed for commercial use.
3 |
4 | To use this software for the development of a commercial product, including but not limited to software, service, or pharmaceuticals, please contact the lead corresponding author.
5 |
6 | Redistribution of the code with or without modification is not permitted without explicit written permission by the lead corresponding author.
7 |
--------------------------------------------------------------------------------
/lineardesign:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python2
2 |
3 | import gflags as flags
4 | import subprocess
5 | import sys
6 | import os
7 |
8 | FLAGS = flags.FLAGS
9 |
10 | def setgflags():
11 | flags.DEFINE_float('lambda', 0.0, "set lambda", short_name='l')
12 | flags.DEFINE_boolean('verbose', False, "print out more details", short_name='v')
13 | flags.DEFINE_string('codonusage', 'codon_usage_freq_table_human.csv', "import a Codon Usage Frequency Table", short_name='c')
14 | argv = FLAGS(sys.argv)
15 |
16 | def main():
17 |
18 | lambda_ = str(FLAGS.l)
19 | verbose_ = '1' if FLAGS.verbose else '0'
20 | codon_usage = str(FLAGS.codonusage)
21 |
22 | path = os.path.dirname(os.path.abspath(__file__))
23 | cmd = ["%s/%s" % (path, ('bin/LinearDesign_2D')), lambda_, verbose_, codon_usage]
24 | subprocess.call(cmd, stdin=sys.stdin)
25 |
26 | if __name__ == '__main__':
27 | setgflags()
28 | main()
29 |
30 |
--------------------------------------------------------------------------------
/pic/baidu_research_logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LinearDesignSoftware/LinearDesign/f0126ca89a8b853088b4bccfd2cc8c378d3678be/pic/baidu_research_logo.jpg
--------------------------------------------------------------------------------
/src/Utils/base.h:
--------------------------------------------------------------------------------
1 | #ifndef base_h
2 | #define base_h
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 |
11 | #if defined(__GNUC__) || defined(__clang__)
12 | #define LINEAR_DESIGN_DEPRECATED __attribute__((deprecated))
13 | #elif defined(_MSC_VER)
14 | #define LINEAR_DESIGN_DEPRECATED __declspec(deprecated)
15 | #else
16 | #pragma message("WARNING: function deprecated")
17 | #define LINEAR_DESIGN_DEPRECATED
18 | #endif
19 |
20 | #if defined(__GNUC__) || defined(__clang__)
21 | #define LINEAR_DESIGN_INLINE inline __attribute__((always_inline))
22 | #else
23 | #define LINEAR_DESIGN_INLINE inline
24 | #endif
25 |
26 | #define LINEAR_DESIGN_CACHELINE 64
27 |
28 | template
29 | using enable_if_t = typename std::enable_if::type;
30 |
31 | template ::value, int> = 0>
32 | std::ostream& operator<< (std::ostream& out, const std::pair& rhs) {
33 | out << "(" << rhs.first << ", " << rhs.second << ")";
34 | return out;
35 | }
36 |
37 | template ::value, int> = 0>
38 | std::ostream& operator<< (std::ostream& out, const std::vector>& rhs) {
39 | out << "[";
40 | for (size_t i = 0; i < rhs.size(); ++i) {
41 | out << rhs[i];
42 | if (i < rhs.size() - 1) out << ",";
43 | }
44 | out << "]";
45 | return out;
46 | }
47 |
48 | namespace LinearDesign {
49 |
50 | namespace util {
51 | std::vector split(const std::string &s, char delim) {
52 | std::vector result;
53 | std::stringstream ss(s);
54 | std::string item;
55 | while (getline(ss, item, delim))
56 | result.push_back(item);
57 | return result;
58 | }
59 |
60 | template
61 | constexpr T value_min() {
62 | static_assert(std::is_integral::value ||
63 | std::is_floating_point::value, "Int or float required.");
64 | return std::numeric_limits::lowest();
65 | }
66 |
67 | template
68 | constexpr T value_max() {
69 | static_assert(std::is_integral::value ||
70 | std::is_floating_point::value, "Int or float required.");
71 | return std::numeric_limits::max();
72 | }
73 | } /* util */
74 |
75 |
76 | // template struct is_any;
77 | // template <> struct is_any<> : std::false_type {};
78 | // template struct is_any {
79 | // constexpr static bool value = First || is_any::value;
80 | // };
81 |
82 | struct hash_pair_pair {
83 | template
84 | size_t operator()(const std::pair, T3>& p) const {
85 | auto hash1 = std::hash{}(p.first.first);
86 | auto hash2 = std::hash{}(p.first.second);
87 | auto hash3 = std::hash{}(p.second);
88 | return hash1 ^ hash2 ^ hash3;
89 | }
90 | };
91 |
92 | struct hash_pair {
93 | template
94 | size_t operator()(const std::pair& p) const {
95 | auto hash1 = std::hash{}(p.first);
96 | auto hash2 = std::hash{}(p.second);
97 | return hash1 ^ hash2;
98 | }
99 | };
100 |
101 | }
102 |
103 |
104 | namespace Hash {
105 | template
106 | LINEAR_DESIGN_INLINE size_t hash_combine(size_t left_seed, const T& right) {
107 | return left_seed ^ (std::hash{}(right) << 1);
108 | }
109 |
110 | template ::value - 1>
111 | struct TupleHashImpl {
112 | static size_t impl(size_t seed, const Tuple& tuple) {
113 | size_t h = hash_combine(seed, std::get(tuple));
114 | return TupleHashImpl::impl(h, tuple);
115 | }
116 | };
117 |
118 | template
119 | struct TupleHashImpl {
120 | static size_t impl(size_t seed, const Tuple& tuple) {
121 | return hash_combine(seed, std::get<0>(tuple));
122 | }
123 | };
124 | }
125 |
126 |
127 | template
128 | struct std::hash> {
129 | size_t operator()(const std::tuple& ts) const {
130 | return Hash::TupleHashImpl>::impl(0, ts);
131 | }
132 | };
133 |
134 | template
135 | struct std::hash> {
136 | size_t operator()(const std::pair& p) const {
137 | size_t h = std::hash{}(p.first);
138 | return Hash::hash_combine(h, p.second);
139 | }
140 | };
141 |
142 |
143 | #endif
--------------------------------------------------------------------------------
/src/Utils/codon.h:
--------------------------------------------------------------------------------
1 | #ifndef codon_h
2 | #define codon_h
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include