├── plugin ├── tandem_lib │ ├── __init__.py │ ├── agent │ │ ├── __init__.py │ │ ├── tandem │ │ │ ├── __init__.py │ │ │ ├── agent │ │ │ │ ├── __init__.py │ │ │ │ ├── io │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── std_streams.py │ │ │ │ │ ├── proxies │ │ │ │ │ │ └── relay.py │ │ │ │ │ └── document.py │ │ │ │ ├── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── connection_state.py │ │ │ │ │ └── connection.py │ │ │ │ ├── protocol │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── handlers │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── rendezvous.py │ │ │ │ │ │ ├── editor.py │ │ │ │ │ │ └── interagent.py │ │ │ │ │ └── messages │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── interagent.py │ │ │ │ │ │ └── editor.py │ │ │ │ ├── stores │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── connection.py │ │ │ │ ├── utils │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── hole_punching.py │ │ │ │ ├── executables │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── agent.py │ │ │ │ └── configuration.py │ │ │ └── shared │ │ │ │ ├── __init__.py │ │ │ │ ├── io │ │ │ │ ├── __init__.py │ │ │ │ ├── proxies │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── list_parameters.py │ │ │ │ │ ├── unicode.py │ │ │ │ │ ├── fragment.py │ │ │ │ │ └── reliability.py │ │ │ │ ├── base.py │ │ │ │ └── udp_gateway.py │ │ │ │ ├── models │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── peer.py │ │ │ │ └── fragment.py │ │ │ │ ├── protocol │ │ │ │ ├── __init__.py │ │ │ │ ├── handlers │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── addressed.py │ │ │ │ │ ├── multi.py │ │ │ │ │ └── base.py │ │ │ │ └── messages │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ └── rendezvous.py │ │ │ │ ├── stores │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── reliability.py │ │ │ │ └── fragment.py │ │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── proxy.py │ │ │ │ ├── static_value.py │ │ │ │ ├── relay.py │ │ │ │ ├── reliability.py │ │ │ │ ├── fragment.py │ │ │ │ └── time_scheduler.py │ │ ├── main.py │ │ └── test_client.py │ ├── tandem_plugin.py │ └── diff_match_patch.py └── tandem_vim.vim ├── README.md └── LICENSE.txt /plugin/tandem_lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/io/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/io/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/protocol/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/stores/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/protocol/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/stores/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/executables/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/io/proxies/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/protocol/handlers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/protocol/messages/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/protocol/handlers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/protocol/messages/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/models/base.py: -------------------------------------------------------------------------------- 1 | class ModelBase(object): 2 | pass 3 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/utils/proxy.py: -------------------------------------------------------------------------------- 1 | class ProxyUtils(object): 2 | @staticmethod 3 | def run(proxies, method, data): 4 | for proxy in proxies: 5 | data = getattr(proxy, method, lambda x: x)(data) 6 | 7 | return data 8 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/models/connection_state.py: -------------------------------------------------------------------------------- 1 | import enum 2 | 3 | 4 | class ConnectionState(enum.Enum): 5 | PING = "ping" 6 | SEND_SYN = "syn" 7 | WAIT_FOR_SYN = "wait" 8 | OPEN = "open" 9 | RELAY = "relay" 10 | UNREACHABLE = "unreachable" 11 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/protocol/handlers/addressed.py: -------------------------------------------------------------------------------- 1 | from tandem.shared.protocol.handlers.base import ProtocolHandlerBase 2 | 3 | 4 | class AddressedHandler(ProtocolHandlerBase): 5 | def _extra_handler_arguments(self, io_data): 6 | return [io_data.get_address()] 7 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/stores/base.py: -------------------------------------------------------------------------------- 1 | class StoreBase(object): 2 | instance = None 3 | 4 | @classmethod 5 | def get_instance(cls): 6 | if not cls.instance: 7 | cls.instance = cls() 8 | return cls.instance 9 | 10 | @classmethod 11 | def reset_instance(cls): 12 | cls.instance = None 13 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/io/proxies/base.py: -------------------------------------------------------------------------------- 1 | class ProxyBase(object): 2 | def attach_interface(self, interface): 3 | self._interface = interface 4 | 5 | def on_retrieve_io_data(self, params): 6 | return params 7 | 8 | def pre_generate_io_data(self, params): 9 | return params 10 | 11 | def pre_write_io_data(self, params): 12 | return params 13 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/utils/static_value.py: -------------------------------------------------------------------------------- 1 | def static_value(inner_function): 2 | # Using dictionary to workaround Python2's lack of `nonlocal` 3 | dict_value = {} 4 | 5 | def outer_function(*args, **kwargs): 6 | if dict_value.get('value', None) is None: 7 | dict_value['value'] = inner_function(*args, **kwargs) 8 | 9 | return dict_value['value'] 10 | 11 | return outer_function 12 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/protocol/handlers/multi.py: -------------------------------------------------------------------------------- 1 | from tandem.shared.protocol.handlers.base import ProtocolHandlerBase 2 | 3 | 4 | class MultiProtocolHandler(ProtocolHandlerBase): 5 | def __init__(self, *handlers): 6 | self._handlers = [handler for handler in handlers] 7 | 8 | def handle_message(self, data_as_dict, io_data): 9 | for handler in self._handlers: 10 | handled = handler.handle_message(data_as_dict, io_data) 11 | if handled: 12 | return True 13 | 14 | return False 15 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/stores/reliability.py: -------------------------------------------------------------------------------- 1 | from tandem.shared.stores.base import StoreBase 2 | 3 | 4 | class ReliabilityStore(StoreBase): 5 | def __init__(self): 6 | self._payloads = {} 7 | 8 | def add_payload(self, payload_id, payload): 9 | self._payloads[payload_id] = payload 10 | 11 | def get_payload(self, payload_id): 12 | return self._payloads.get(payload_id, None) 13 | 14 | def remove_payload(self, payload_id): 15 | if payload_id in self._payloads: 16 | del self._payloads[payload_id] 17 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/io/proxies/list_parameters.py: -------------------------------------------------------------------------------- 1 | from tandem.shared.io.proxies.base import ProxyBase 2 | 3 | 4 | class ListParametersProxy(ProxyBase): 5 | @staticmethod 6 | def make_lists(items): 7 | new_items = [] 8 | for item in items: 9 | if type(item) is not list: 10 | item = [item] 11 | new_items.append(item) 12 | return new_items 13 | 14 | def pre_generate_io_data(self, params): 15 | args, kwargs = params 16 | return (ListParametersProxy.make_lists(args), kwargs) 17 | 18 | def pre_write_io_data(self, params): 19 | args, kwargs = params 20 | return (ListParametersProxy.make_lists(args), kwargs) 21 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/io/proxies/unicode.py: -------------------------------------------------------------------------------- 1 | from tandem.shared.io.proxies.base import ProxyBase 2 | 3 | 4 | class UnicodeProxy(ProxyBase): 5 | def pre_generate_io_data(self, params): 6 | args, kwargs = params 7 | messages, addresses = args 8 | encoded_messages = [ 9 | message.encode("utf-8") if hasattr(message, "encode") else message 10 | for message in messages 11 | ] 12 | return ((encoded_messages, addresses), kwargs) 13 | 14 | def on_retrieve_io_data(self, params): 15 | args, kwargs = params 16 | if args is None: 17 | return params 18 | 19 | raw_data, address = args 20 | data = ( 21 | raw_data.decode("utf-8") if hasattr(raw_data, "decode") 22 | else raw_data 23 | ) 24 | return ((data, address), kwargs) 25 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/io/std_streams.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import logging 3 | from tandem.shared.io.base import InterfaceDataBase, InterfaceBase 4 | 5 | 6 | class STDData(InterfaceDataBase): 7 | pass 8 | 9 | 10 | class STDStreams(InterfaceBase): 11 | data_class = STDData 12 | 13 | def __init__(self, handler_function): 14 | super(STDStreams, self).__init__(handler_function) 15 | 16 | def stop(self): 17 | super(STDStreams, self).stop() 18 | sys.stdout.close() 19 | 20 | def write_io_data(self, *args, **kwargs): 21 | io_data, = args 22 | 23 | sys.stdout.write(io_data.get_data()) 24 | sys.stdout.write("\n") 25 | sys.stdout.flush() 26 | 27 | def _read_data(self): 28 | try: 29 | for line in sys.stdin: 30 | self._received_data(line) 31 | except: 32 | logging.exception("Exception when reading from stdin:") 33 | raise 34 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/models/peer.py: -------------------------------------------------------------------------------- 1 | from tandem.shared.models.base import ModelBase 2 | 3 | 4 | class Peer(ModelBase): 5 | def __init__(self, id, public_address, private_address=None): 6 | self._id = id 7 | self._public_address = public_address 8 | self._private_address = private_address 9 | 10 | def __eq__(self, other): 11 | return ( 12 | self._id == other._id and 13 | self._public_address == other._public_address and 14 | self._private_address == other._private_address 15 | ) 16 | 17 | def get_id(self): 18 | return self._id 19 | 20 | def get_addresses(self): 21 | addresses = [self._public_address] 22 | if self._private_address is not None: 23 | addresses.append(self._private_address) 24 | return addresses 25 | 26 | def get_public_address(self): 27 | return self._public_address 28 | 29 | def get_private_address(self): 30 | return self._private_address 31 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/stores/fragment.py: -------------------------------------------------------------------------------- 1 | from tandem.shared.models.fragment import FragmentGroup 2 | from tandem.shared.stores.base import StoreBase 3 | 4 | 5 | class FragmentStore(StoreBase): 6 | def __init__(self): 7 | self._peer_fragment_groups = {} 8 | 9 | def insert_fragment(self, address, message_id, fragment): 10 | if address not in self._peer_fragment_groups: 11 | self._peer_fragment_groups[address] = {} 12 | 13 | fragment_groups = self._peer_fragment_groups[address] 14 | if message_id not in fragment_groups: 15 | new_group = FragmentGroup(fragment.get_total_fragments()) 16 | fragment_groups[message_id] = new_group 17 | 18 | fragment_groups[message_id].add_fragment(fragment) 19 | 20 | def get_fragment_group(self, address, message_id): 21 | return self._peer_fragment_groups[address][message_id] 22 | 23 | def remove_fragment_group(self, address, message_id): 24 | del self._peer_fragment_groups[address][message_id] 25 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/configuration.py: -------------------------------------------------------------------------------- 1 | import os 2 | import socket 3 | 4 | # Tandem will try to establish a direct connection with other peers in a 5 | # session. However, this is not always possible. When Tandem is unable to 6 | # establish a peer-to-peer connection, we will relay messages to each peer 7 | # through our servers. If this is undesirable for your use case, you can set 8 | # this flag to "False". 9 | # 10 | # Please note that with relay disabled, you will not be able to collaborate 11 | # with any peers that Tandem cannot reach directly. Tandem does not notify you 12 | # if a peer-to-peer connection cannot be established. 13 | USE_RELAY = True 14 | 15 | # DO NOT edit anything below this unless you know what you're doing! 16 | 17 | PROJECT_ROOT = os.path.join( 18 | os.path.dirname(os.path.abspath(__file__)), 19 | '..', 20 | '..', 21 | '..', 22 | ) 23 | BASE_DIR = os.path.dirname(PROJECT_ROOT) 24 | CRDT_PATH = os.path.join(BASE_DIR, "..", "crdt") 25 | PLUGIN_PATH = os.path.join(BASE_DIR, "..", "plugins") 26 | RENDEZVOUS_ADDRESS = ( 27 | socket.gethostbyname("rendezvous.typeintandem.com"), 28 | 60000, 29 | ) 30 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/utils/hole_punching.py: -------------------------------------------------------------------------------- 1 | from tandem.agent.protocol.messages.interagent import ( 2 | InteragentProtocolUtils, 3 | Ping, 4 | Syn, 5 | ) 6 | 7 | 8 | class HolePunchingUtils: 9 | PING_INTERVAL = 0.15 10 | SYN_INTERVAL = 0.15 11 | TIMEOUT = 3 12 | 13 | @staticmethod 14 | def generate_send_ping(gateway, addresses, id): 15 | def send_ping(): 16 | HolePunchingUtils._send_message( 17 | gateway, 18 | addresses, 19 | Ping(id=str(id)), 20 | ) 21 | return send_ping 22 | 23 | @staticmethod 24 | def generate_send_syn(gateway, address): 25 | def send_syn(): 26 | HolePunchingUtils._send_message( 27 | gateway, 28 | address, 29 | Syn(), 30 | ) 31 | return send_syn 32 | 33 | @staticmethod 34 | def _send_message(gateway, addresses, message): 35 | io_data = gateway.generate_io_data( 36 | InteragentProtocolUtils.serialize(message), 37 | addresses, 38 | ) 39 | gateway.write_io_data(io_data) 40 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/stores/connection.py: -------------------------------------------------------------------------------- 1 | from tandem.shared.stores.base import StoreBase 2 | from tandem.agent.models.connection_state import ConnectionState 3 | 4 | 5 | class ConnectionStore(StoreBase): 6 | def __init__(self): 7 | self._connections = {} 8 | 9 | def add_connection(self, connection): 10 | self._connections[connection.get_id()] = connection 11 | 12 | def remove_connection(self, connection): 13 | del self._connections[connection.get_id()] 14 | 15 | def get_connection_by_id(self, id): 16 | return self._connections.get(id, None) 17 | 18 | def get_connection_by_address(self, address): 19 | for _, connection in self._connections.items(): 20 | if connection.get_active_address() == address: 21 | return connection 22 | return None 23 | 24 | def get_open_connections(self): 25 | return [ 26 | connection for _, connection in self._connections.items() 27 | if ( 28 | connection.get_connection_state() == ConnectionState.OPEN or 29 | connection.get_connection_state() == ConnectionState.RELAY 30 | ) 31 | ] 32 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/models/fragment.py: -------------------------------------------------------------------------------- 1 | from tandem.shared.models.base import ModelBase 2 | 3 | 4 | class Fragment(ModelBase): 5 | def __init__( 6 | self, 7 | total_fragments, 8 | fragment_number, 9 | payload, 10 | ): 11 | self._total_fragments = total_fragments 12 | self._fragment_number = fragment_number 13 | self._payload = payload 14 | 15 | def get_total_fragments(self): 16 | return self._total_fragments 17 | 18 | def get_fragment_number(self): 19 | return self._fragment_number 20 | 21 | def get_payload(self): 22 | return self._payload 23 | 24 | 25 | class FragmentGroup(ModelBase): 26 | def __init__(self, total_fragments): 27 | self._total_fragments = total_fragments 28 | self._buffer = [None for _ in range(total_fragments)] 29 | 30 | def add_fragment(self, fragment): 31 | fragment_number = fragment.get_fragment_number() 32 | if self._buffer[fragment_number] is None: 33 | self._buffer[fragment_number] = fragment.get_payload() 34 | 35 | def is_complete(self): 36 | non_empty_fragments = list(filter(lambda x: x, self._buffer)) 37 | return len(non_empty_fragments) >= self._total_fragments 38 | 39 | def defragment(self): 40 | return b"".join(self._buffer) if self.is_complete() else None 41 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/utils/relay.py: -------------------------------------------------------------------------------- 1 | class RelayUtils(object): 2 | HEADER = b"\x54\x01" 3 | RELAY_HEADER = b"\x52\x45" 4 | 5 | @classmethod 6 | def is_relay(cls, raw_data): 7 | return ( 8 | raw_data[0:2] == cls.HEADER and 9 | raw_data[2:4] == cls.RELAY_HEADER 10 | ) 11 | 12 | @staticmethod 13 | def serialize(payload, address): 14 | result = [] 15 | ip, port = address 16 | ip_binary = map( 17 | lambda x: (int(x)).to_bytes(1, byteorder="big"), 18 | ip.split("."), 19 | ) 20 | 21 | result.append(RelayUtils.HEADER) 22 | result.append(RelayUtils.RELAY_HEADER) 23 | result.extend(ip_binary) 24 | result.append(port.to_bytes(2, byteorder="big")) 25 | result.append(payload) 26 | 27 | return b"".join(result) 28 | 29 | @staticmethod 30 | def deserialize(raw_data): 31 | ip = ".".join([ 32 | str(int.from_bytes(raw_data[4:5], byteorder="big")), 33 | str(int.from_bytes(raw_data[5:6], byteorder="big")), 34 | str(int.from_bytes(raw_data[6:7], byteorder="big")), 35 | str(int.from_bytes(raw_data[7:8], byteorder="big")), 36 | ]) 37 | port = int.from_bytes(raw_data[8:10], byteorder="big") 38 | 39 | address = (ip, port) 40 | payload = raw_data[10:] 41 | 42 | return payload, address 43 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/main.py: -------------------------------------------------------------------------------- 1 | import signal 2 | import logging 3 | import threading 4 | import argparse 5 | from tandem.agent.executables.agent import TandemAgent 6 | 7 | should_shutdown = threading.Event() 8 | 9 | 10 | def signal_handler(signal, frame): 11 | global should_shutdown 12 | should_shutdown.set() 13 | 14 | 15 | def set_up_logging(log_location): 16 | logging.basicConfig( 17 | level=logging.DEBUG, 18 | format="%(asctime)s %(levelname)-8s %(message)s", 19 | datefmt="%Y-%m-%d %H:%M", 20 | filename=log_location, 21 | filemode="w", 22 | ) 23 | 24 | 25 | def main(): 26 | signal.signal(signal.SIGINT, signal_handler) 27 | signal.signal(signal.SIGTERM, signal_handler) 28 | 29 | parser = argparse.ArgumentParser(description="Starts the Tandem agent.") 30 | parser.add_argument( 31 | "--host", 32 | default="", 33 | help="The host address to bind to.", 34 | ) 35 | parser.add_argument( 36 | "--port", 37 | default=0, 38 | type=int, 39 | help="The port to listen on.", 40 | ) 41 | parser.add_argument( 42 | "--log-file", 43 | default="/tmp/tandem-agent.log", 44 | help="The location of the log file.", 45 | ) 46 | args = parser.parse_args() 47 | 48 | set_up_logging(args.log_file) 49 | 50 | # Run the agent until asked to terminate 51 | with TandemAgent(args.host, args.port): 52 | should_shutdown.wait() 53 | 54 | 55 | if __name__ == "__main__": 56 | main() 57 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/protocol/messages/base.py: -------------------------------------------------------------------------------- 1 | import json 2 | import enum 3 | 4 | 5 | class ProtocolMarshalError(ValueError): 6 | pass 7 | 8 | 9 | class ProtocolMessageTypeBase(enum.Enum): 10 | pass 11 | 12 | 13 | class ProtocolMessageBase(object): 14 | def __init__(self, message_type, **kwargs): 15 | for key in self._payload_keys(): 16 | setattr(self, key, kwargs.get(key, None)) 17 | 18 | self.type = message_type 19 | 20 | def _payload_keys(self): 21 | return None 22 | 23 | def to_payload(self): 24 | return {key: getattr(self, key, None) for key in self._payload_keys()} 25 | 26 | @classmethod 27 | def from_payload(cls, **kwargs): 28 | return cls(**kwargs) 29 | 30 | 31 | class ProtocolUtilsBase(object): 32 | @classmethod 33 | def _protocol_message_constructors(cls): 34 | return None 35 | 36 | @staticmethod 37 | def serialize(message): 38 | as_dict = { 39 | "type": message.type.value, 40 | "payload": message.to_payload(), 41 | "version": 1, 42 | } 43 | return json.dumps(as_dict) 44 | 45 | @classmethod 46 | def deserialize(cls, as_dict): 47 | data_message_type = as_dict["type"] 48 | data_payload = as_dict["payload"] 49 | items = cls._protocol_message_constructors().items() 50 | 51 | for message_type, target_class in items: 52 | if message_type == data_message_type: 53 | return target_class.from_payload(**data_payload) 54 | 55 | raise ProtocolMarshalError 56 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/io/proxies/fragment.py: -------------------------------------------------------------------------------- 1 | from tandem.shared.io.proxies.base import ProxyBase 2 | from tandem.shared.utils.fragment import FragmentUtils 3 | 4 | 5 | class FragmentProxy(ProxyBase): 6 | def __init__(self, max_message_length=512): 7 | self._max_message_length = max_message_length 8 | 9 | def pre_generate_io_data(self, params): 10 | args, kwargs = params 11 | messages, addresses = args 12 | 13 | if type(messages) is not list: 14 | messages = [messages] 15 | 16 | new_messages = [] 17 | for message in messages: 18 | should_fragment = FragmentUtils.should_fragment( 19 | message, 20 | self._max_message_length, 21 | ) 22 | if should_fragment: 23 | new_messages.extend(FragmentUtils.fragment( 24 | message, 25 | self._max_message_length, 26 | )) 27 | else: 28 | new_messages.append(message) 29 | 30 | new_args = (new_messages, addresses) 31 | return (new_args, kwargs) 32 | 33 | def on_retrieve_io_data(self, params): 34 | args, kwargs = params 35 | if args is None or args is (None, None): 36 | return params 37 | 38 | raw_data, address = args 39 | 40 | if FragmentUtils.is_fragment(raw_data): 41 | defragmented_data = FragmentUtils.defragment(raw_data, address) 42 | if defragmented_data: 43 | new_args = (defragmented_data, address) 44 | return (new_args, kwargs) 45 | else: 46 | return (None, None) 47 | else: 48 | return params 49 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/protocol/handlers/base.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import json 3 | from tandem.shared.protocol.messages.base import ProtocolMarshalError 4 | 5 | 6 | class ProtocolHandlerBase(object): 7 | def _protocol_message_utils(self): 8 | return None 9 | 10 | def _protocol_message_handlers(self): 11 | return None 12 | 13 | def _extra_handler_arguments(self, io_data): 14 | return [] 15 | 16 | def handle_raw_data(self, retrieve_io_data): 17 | try: 18 | io_data = retrieve_io_data() 19 | if io_data is None or io_data.is_empty(): 20 | return 21 | 22 | data_as_dict = json.loads(io_data.get_data()) 23 | handled = self.handle_message(data_as_dict, io_data) 24 | 25 | if not handled: 26 | logging.info( 27 | "Protocol message was not handled because " 28 | "no handler was registered.", 29 | ) 30 | 31 | except json.JSONDecodeError: 32 | logging.info( 33 | "Protocol message was ignored because it was not valid JSON.", 34 | ) 35 | 36 | except: 37 | logging.exception("Exception when handling protocol message:") 38 | raise 39 | 40 | def handle_message(self, data_as_dict, io_data): 41 | try: 42 | message = \ 43 | self._protocol_message_utils().deserialize(data_as_dict) 44 | items = self._protocol_message_handlers().items() 45 | 46 | for message_type, handler in items: 47 | if message_type == message.type.value: 48 | handler(message, *self._extra_handler_arguments(io_data)) 49 | return True 50 | 51 | return False 52 | 53 | except ProtocolMarshalError: 54 | return False 55 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/io/proxies/relay.py: -------------------------------------------------------------------------------- 1 | from tandem.shared.io.proxies.base import ProxyBase 2 | from tandem.shared.utils.relay import RelayUtils 3 | from tandem.shared.io.udp_gateway import UDPGateway 4 | from tandem.agent.stores.connection import ConnectionStore 5 | 6 | 7 | class AgentRelayProxy(ProxyBase): 8 | def __init__(self, relay_server_address): 9 | self._relay_server_address = relay_server_address 10 | 11 | def should_relay(self, address): 12 | connection_store = ConnectionStore.get_instance() 13 | connection = connection_store.get_connection_by_address(address) 14 | return ( 15 | self._relay_server_address != address and 16 | connection and connection.is_relayed() 17 | ) 18 | 19 | def pre_write_io_data(self, params): 20 | args, kwargs = params 21 | io_datas, = args 22 | 23 | new_io_datas = [] 24 | for io_data in io_datas: 25 | new_io_data = io_data 26 | if self.should_relay(io_data.get_address()): 27 | new_raw_data = RelayUtils.serialize( 28 | io_data.get_data(), 29 | io_data.get_address(), 30 | ) 31 | new_io_data = UDPGateway.data_class( 32 | new_raw_data, 33 | self._relay_server_address, 34 | ) 35 | new_io_datas.append(new_io_data) 36 | 37 | new_args = (new_io_datas,) 38 | return (new_args, kwargs) 39 | 40 | def on_retrieve_io_data(self, params): 41 | args, kwargs = params 42 | if args is None or args is (None, None): 43 | return params 44 | 45 | raw_data, address = args 46 | 47 | if RelayUtils.is_relay(raw_data): 48 | new_data, new_address = RelayUtils.deserialize(raw_data) 49 | new_args = new_data, new_address 50 | return (new_args, kwargs) 51 | else: 52 | return params 53 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/utils/reliability.py: -------------------------------------------------------------------------------- 1 | from tandem.shared.stores.reliability import ReliabilityStore 2 | 3 | 4 | class ReliabilityUtils(object): 5 | HEADER = b"\x54\x01" 6 | RELIABILITY_HEADER = b"\x52\x4C" 7 | ACK_HEADER = b"\x41\x43" 8 | ACK_TIMEOUT = 3 9 | 10 | MAX_ACK_NUMBER = int(0xFFFF) 11 | next_ack_number = -1 12 | 13 | @classmethod 14 | def get_next_ack_number(cls): 15 | cls.next_ack_number += 1 16 | cls.next_ack_number %= cls.MAX_ACK_NUMBER + 1 17 | 18 | return cls.next_ack_number 19 | 20 | @classmethod 21 | def is_ack(cls, raw_data): 22 | return ( 23 | raw_data[0:2] == cls.HEADER and 24 | raw_data[2:4] == cls.ACK_HEADER 25 | ) 26 | 27 | @classmethod 28 | def is_ackable(cls, raw_data): 29 | return ( 30 | raw_data[0:2] == cls.HEADER and 31 | raw_data[2:4] == cls.RELIABILITY_HEADER 32 | ) 33 | 34 | @staticmethod 35 | def should_resend_payload(ack_id): 36 | return ReliabilityStore.get_instance().get_payload(ack_id) 37 | 38 | @staticmethod 39 | def generate_ack(ack_id): 40 | result = [] 41 | result.append(ReliabilityUtils.HEADER) 42 | result.append(ReliabilityUtils.ACK_HEADER) 43 | result.append((ack_id).to_bytes(2, byteorder="big")) 44 | return b"".join(result) 45 | 46 | @staticmethod 47 | def parse_ack(raw_data): 48 | return int.from_bytes(raw_data[4:6], byteorder="big") 49 | 50 | @staticmethod 51 | def serialize(payload): 52 | result = [] 53 | ack_number = ReliabilityUtils.get_next_ack_number() 54 | result.append(ReliabilityUtils.HEADER) 55 | result.append(ReliabilityUtils.RELIABILITY_HEADER) 56 | result.append(ack_number.to_bytes(2, byteorder="big")) 57 | result.append(payload) 58 | return b"".join(result), ack_number 59 | 60 | @staticmethod 61 | def deserialize(raw_data): 62 | ack_id = int.from_bytes(raw_data[4:6], byteorder="big") 63 | payload = raw_data[6:] 64 | return payload, ack_id 65 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/io/base.py: -------------------------------------------------------------------------------- 1 | from threading import Thread 2 | from tandem.shared.utils.proxy import ProxyUtils 3 | 4 | 5 | class InterfaceDataBase(object): 6 | def __init__(self, data): 7 | self._data = data 8 | 9 | def get_data(self): 10 | return self._data 11 | 12 | def is_empty(self): 13 | return self._data is None 14 | 15 | 16 | class InterfaceBase(object): 17 | data_class = InterfaceDataBase 18 | 19 | def __init__(self, incoming_data_handler, proxies=[]): 20 | self._incoming_data_handler = incoming_data_handler 21 | self._reader = Thread(target=self._read_data) 22 | self._proxies = proxies 23 | for proxy in proxies: 24 | proxy.attach_interface(self) 25 | 26 | def start(self): 27 | self._reader.start() 28 | 29 | def stop(self): 30 | self._reader.join() 31 | 32 | def generate_io_data(self, *args, **kwargs): 33 | new_args, new_kwargs = ProxyUtils.run( 34 | self._proxies, 35 | 'pre_generate_io_data', 36 | (args, kwargs), 37 | ) 38 | return self._generate_io_data(*new_args, **new_kwargs) 39 | 40 | def write_io_data(self, *args, **kwargs): 41 | new_args, new_kwargs = ProxyUtils.run( 42 | self._proxies, 43 | 'pre_write_io_data', 44 | (args, kwargs), 45 | ) 46 | return self._write_io_data(*new_args, **new_kwargs) 47 | 48 | def _generate_io_data(self, *args, **kwargs): 49 | return self.data_class(*args, **kwargs) 50 | 51 | def _write_io_data(self, *args, **kwargs): 52 | raise 53 | 54 | def _read_data(self): 55 | raise 56 | 57 | def _received_data(self, *args, **kwargs): 58 | def retrieve_io_data(): 59 | new_args, new_kwargs = ProxyUtils.run( 60 | self._proxies[::-1], 61 | 'on_retrieve_io_data', 62 | (args, kwargs), 63 | ) 64 | if new_args is not None and new_kwargs is not None: 65 | return self.data_class(*new_args, **new_kwargs) 66 | else: 67 | return None 68 | 69 | self._incoming_data_handler(retrieve_io_data) 70 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/protocol/messages/rendezvous.py: -------------------------------------------------------------------------------- 1 | from tandem.shared.protocol.messages.base import ( 2 | ProtocolMessageBase, 3 | ProtocolMessageTypeBase, 4 | ProtocolUtilsBase, 5 | ) 6 | from tandem.shared.utils.static_value import static_value as staticvalue 7 | 8 | 9 | class RendezvousProtocolMessageType(ProtocolMessageTypeBase): 10 | ConnectRequest = "rv-connect-request" 11 | SetupParameters = "rv-setup-parameters" 12 | Error = "rv-error" 13 | 14 | 15 | class ConnectRequest(ProtocolMessageBase): 16 | """ 17 | Sent by an agent to request to join an existing session. 18 | """ 19 | def __init__(self, **kwargs): 20 | super(ConnectRequest, self).__init__( 21 | RendezvousProtocolMessageType.ConnectRequest, 22 | **kwargs, 23 | ) 24 | 25 | @staticvalue 26 | def _payload_keys(self): 27 | return ["session_id", "my_id", "private_address"] 28 | 29 | 30 | class SetupParameters(ProtocolMessageBase): 31 | """ 32 | Sent by the server to agents to inform them to connect. 33 | """ 34 | def __init__(self, **kwargs): 35 | super(SetupParameters, self).__init__( 36 | RendezvousProtocolMessageType.SetupParameters, 37 | **kwargs, 38 | ) 39 | 40 | @staticvalue 41 | def _payload_keys(self): 42 | return ["session_id", "peer_id", "initiate", "public", "private"] 43 | 44 | 45 | class Error(ProtocolMessageBase): 46 | """ 47 | Sent by the server to send an error message. 48 | """ 49 | def __init__(self, **kwargs): 50 | super(Error, self).__init__( 51 | RendezvousProtocolMessageType.Error, 52 | **kwargs, 53 | ) 54 | 55 | @staticvalue 56 | def _payload_keys(self): 57 | return ["message"] 58 | 59 | 60 | class RendezvousProtocolUtils(ProtocolUtilsBase): 61 | @classmethod 62 | @staticvalue 63 | def _protocol_message_constructors(cls): 64 | return { 65 | RendezvousProtocolMessageType.ConnectRequest.value: 66 | ConnectRequest, 67 | RendezvousProtocolMessageType.SetupParameters.value: 68 | SetupParameters, 69 | RendezvousProtocolMessageType.Error.value: Error, 70 | } 71 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/io/document.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from subprocess import Popen, PIPE 4 | from tandem.agent.configuration import CRDT_PATH 5 | 6 | CRDT_PROCESS = ["node", os.path.join(CRDT_PATH, "build", "bundle.js")] 7 | 8 | 9 | class Document: 10 | def __init__(self): 11 | self._crdt_process = None 12 | self._pending_remote_operations = [] 13 | self._write_request_sent = False 14 | 15 | def start(self): 16 | self._crdt_process = Popen( 17 | CRDT_PROCESS, 18 | stdin=PIPE, 19 | stdout=PIPE, 20 | encoding="utf-8", 21 | ) 22 | 23 | def stop(self): 24 | self._crdt_process.stdin.close() 25 | self._crdt_process.terminate() 26 | self._crdt_process.wait() 27 | 28 | def apply_operations(self, operations_list): 29 | return self._call_remote_function( 30 | "applyOperations", 31 | [operations_list], 32 | ) 33 | 34 | def get_document_text(self): 35 | return self._call_remote_function("getDocumentText") 36 | 37 | def set_text_in_range(self, start, end, text): 38 | return self._call_remote_function( 39 | "setTextInRange", 40 | [start, end, text], 41 | ) 42 | 43 | def get_document_operations(self): 44 | return self._call_remote_function("getDocumentOperations") 45 | 46 | def enqueue_remote_operations(self, operations_list): 47 | self._pending_remote_operations.extend(operations_list) 48 | 49 | def apply_queued_operations(self): 50 | text_patches = self.apply_operations(self._pending_remote_operations) 51 | self._pending_remote_operations.clear() 52 | return text_patches 53 | 54 | def write_request_sent(self): 55 | return self._write_request_sent 56 | 57 | def set_write_request_sent(self, value): 58 | self._write_request_sent = value 59 | 60 | def _call_remote_function(self, function_name, parameters=None): 61 | call_message = {"function": function_name} 62 | if parameters is not None: 63 | call_message["parameters"] = parameters 64 | self._crdt_process.stdin.write(json.dumps(call_message)) 65 | self._crdt_process.stdin.write("\n") 66 | self._crdt_process.stdin.flush() 67 | 68 | response = json.loads(self._crdt_process.stdout.readline()) 69 | return response["value"] 70 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/io/proxies/reliability.py: -------------------------------------------------------------------------------- 1 | from tandem.shared.io.udp_gateway import UDPGateway 2 | from tandem.shared.io.proxies.base import ProxyBase 3 | from tandem.shared.utils.reliability import ReliabilityUtils 4 | from tandem.shared.stores.reliability import ReliabilityStore 5 | import logging 6 | 7 | 8 | class ReliabilityProxy(ProxyBase): 9 | def __init__(self, time_scheduler): 10 | self._time_scheduler = time_scheduler 11 | 12 | def _handle_ack_timeout(self, ack_id, io_data): 13 | if ReliabilityUtils.should_resend_payload(ack_id): 14 | logging.info("Timeout on ack {}, resending".format(ack_id)) 15 | self._interface._write_io_data([io_data]) 16 | self._time_scheduler.run_after( 17 | ReliabilityUtils.ACK_TIMEOUT, 18 | self._handle_ack_timeout, 19 | ack_id, 20 | io_data 21 | ) 22 | 23 | def pre_write_io_data(self, params): 24 | args, kwargs = params 25 | io_datas, = args 26 | should_ack = kwargs.get('reliability', False) 27 | 28 | if not should_ack: 29 | return params 30 | 31 | new_io_datas = [] 32 | for io_data in io_datas: 33 | new_io_data = io_data 34 | new_raw_data, ack_id = ReliabilityUtils.serialize( 35 | io_data.get_data(), 36 | ) 37 | new_io_data = UDPGateway.data_class( 38 | new_raw_data, 39 | io_data.get_address(), 40 | ) 41 | 42 | ReliabilityStore.get_instance().add_payload(ack_id, new_io_data) 43 | self._time_scheduler.run_after( 44 | ReliabilityUtils.ACK_TIMEOUT, 45 | self._handle_ack_timeout, 46 | ack_id, 47 | new_io_data 48 | ) 49 | 50 | new_io_datas.append(new_io_data) 51 | 52 | new_args = (new_io_datas,) 53 | return (new_args, kwargs) 54 | 55 | def on_retrieve_io_data(self, params): 56 | args, kwargs = params 57 | raw_data, address = args 58 | 59 | if ReliabilityUtils.is_ack(raw_data): 60 | ack_id = ReliabilityUtils.parse_ack(raw_data) 61 | ReliabilityStore.get_instance().remove_payload(ack_id) 62 | return (None, None) 63 | 64 | elif ReliabilityUtils.is_ackable(raw_data): 65 | new_raw_data, ack_id = ReliabilityUtils.deserialize(raw_data) 66 | ack_payload = ReliabilityUtils.generate_ack(ack_id) 67 | self._interface.write_io_data([ 68 | self._interface.data_class(ack_payload, address), 69 | ]) 70 | 71 | new_args = new_raw_data, address 72 | return (new_args, kwargs) 73 | 74 | else: 75 | return params 76 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/io/udp_gateway.py: -------------------------------------------------------------------------------- 1 | import select 2 | import socket 3 | import logging 4 | from tandem.shared.io.base import InterfaceDataBase, InterfaceBase 5 | 6 | 7 | class UDPData(InterfaceDataBase): 8 | def __init__(self, raw_data, address): 9 | super(UDPData, self).__init__(raw_data) 10 | self._address = address 11 | 12 | def get_address(self): 13 | return self._address 14 | 15 | def is_empty(self): 16 | return self._data is None and self._address is None 17 | 18 | 19 | class UDPGateway(InterfaceBase): 20 | data_class = UDPData 21 | SELECT_TIMEOUT = 0.5 22 | 23 | def __init__(self, host, port, handler_function, proxies=[]): 24 | super(UDPGateway, self).__init__(handler_function, proxies) 25 | self._host = host 26 | self._port = port 27 | self._socket = socket.socket( 28 | socket.AF_INET, 29 | socket.SOCK_DGRAM, 30 | ) 31 | self._shutdown_requested = False 32 | 33 | def start(self): 34 | self._socket.bind((self._host, self._port)) 35 | super(UDPGateway, self).start() 36 | logging.info("Tandem UDPGateway is listening on {}.".format(( 37 | self._host, 38 | self._port, 39 | ))) 40 | 41 | def stop(self): 42 | self._shutdown_requested = True 43 | # We need to ensure the reader thread has been joined before closing 44 | # the socket to make sure we don't call select() on an invalid file 45 | # descriptor. 46 | super(UDPGateway, self).stop() 47 | self._socket.close() 48 | 49 | def get_port(self): 50 | return self._socket.getsockname()[1] 51 | 52 | def _generate_io_data(self, *args, **kwargs): 53 | messages, addresses = args 54 | 55 | data = [] 56 | for address in addresses: 57 | for message in messages: 58 | data.append(UDPData(message, address)) 59 | 60 | return data 61 | 62 | def _write_io_data(self, *args, **kwargs): 63 | io_datas, = args 64 | 65 | for io_data in io_datas: 66 | message = io_data.get_data() 67 | address = io_data.get_address() 68 | bytes_sent = 0 69 | 70 | while bytes_sent < len(message): 71 | bytes_sent += self._socket.sendto( 72 | message[bytes_sent:], 73 | address 74 | ) 75 | 76 | def _read_data(self): 77 | while not self._shutdown_requested: 78 | ready_to_read, _, _ = select.select( 79 | [self._socket], 80 | [], 81 | [], 82 | UDPGateway.SELECT_TIMEOUT, 83 | ) 84 | if len(ready_to_read) == 0: 85 | # If no descriptors are ready to read, it means the select() 86 | # call timed out. So check if we should exit and, if not, wait 87 | # for data again. 88 | continue 89 | 90 | raw_data, address = self._socket.recvfrom(4096) 91 | logging.debug("Received data from {}:{}.".format(*address)) 92 | self._received_data(raw_data, address) 93 | 94 | logging.info( 95 | "Tandem has closed the UDP gateway on port {}." 96 | .format(self._port), 97 | ) 98 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tandem 2 | 3 | Tandem is an add-on for your favorite text editor that enables peer-to-peer 4 | collaborative editing across different editors. 5 | 6 | This repository contains code for the Vim plugin. For more details on Tandem, 7 | visit [our website](http://typeintandem.com), or our [mono-repository 8 | containing all other source code.](https://github.com/typeintandem/tandem) 9 | 10 | *Note: Vim is not officially supported due to its lack of thread-safety. 11 | Instead we recommend Tandem with Neovim, one of our officially supported 12 | plugins. 13 | We added functionality to this editor since it was a minimal amount of work to 14 | port the logic - please use at your own risk.* 15 | 16 | ## Installation 17 | To install, you must have a copy of vim compiled with python installed. 18 | You must also have `python3` and `node.js` installed. 19 | 20 | Vim users have the option of installing in one of the following ways: 21 | - **[Recommended]** Using your favourite plugin manager (e.g. Vundle, vim-plug, 22 | etc.) Tandem should be compatible with most popular plugin managers 23 | - Installing Tandem directly. You’ll need download this repository. In your 24 | `~/.vimrc`, make sure you source the `tandem_vim.vim` file: 25 | `source /path/to/tandem/plugin/tandem_vim.vim` 26 | 27 | ## Usage 28 | Tandem users can choose either start a collaborative session or join an 29 | existing one. Starting a collaborative session will share the contents of your 30 | current buffer. Joining an existing session will open it’s contents in a new 31 | buffer. 32 | 33 | Please use one of the following commands: 34 | - `:Tandem` - creates a new tandem session and prints the session ID 35 | - `:Tandem ` - joins an existing tandem session with the specified 36 | session ID 37 | - `:TandemStop` - leaves the current session 38 | - `:TandemSession` - prints the current session ID 39 | 40 | It is recommended to leave the session before exiting vim, but that process 41 | should be automated. 42 | 43 | ## Terms of Service 44 | By using Tandem, you agree that any modified versions of Tandem will not use 45 | the rendezvous server hosted by the owners. You must host and use your own copy 46 | of the rendezvous server. We want to provide a good user experience for Tandem, 47 | and it would be difficult to do that with modified clients as well. 48 | 49 | You can launch the rendezvous server by running `python3 ./rendezvous/main.py`. 50 | Change the address of the rendezvous server used by the agent in the 51 | configuration file to point to your server's host. This file is located at: 52 | `plugin/tandem_lib/agent/tandem/agent/configuration.py` 53 | 54 | ## License 55 | Copyright (c) 2018 Team Lightly 56 | 57 | See [LICENSE.txt](LICENSE.txt) 58 | 59 | Licensed under the Apache License, Version 2.0 (the "License"); 60 | you may not use this file except in compliance with the License. 61 | You may obtain a copy of the License at: 62 | 63 | http://www.apache.org/licenses/LICENSE-2.0 64 | ## Authors 65 | Team Lightly 66 | [Geoffrey Yu](https://github.com/geoffxy), [Jamiboy 67 | Mohammad](https://github.com/jamiboym) and [Sameer 68 | Chitley](https://github.com/rageandqq) 69 | 70 | We are a team of senior Software Engineering students at the University of 71 | Waterloo. 72 | Tandem was created as our [Engineering Capstone Design 73 | Project](https://uwaterloo.ca/capstone-design). 74 | -------------------------------------------------------------------------------- /plugin/tandem_vim.vim: -------------------------------------------------------------------------------- 1 | if !has('python') 2 | " :echom is persistent messaging. See 3 | " http://learnvimscriptthehardway.stevelosh.com/chapters/01.html 4 | :echom 'ERROR: Please use a version of Vim with Python support' 5 | finish 6 | endif 7 | 8 | if !executable('python3') 9 | :echom 'ERROR: Global python3 install required.' 10 | finish 11 | endif 12 | 13 | " Bind the Tandem functions to globally available commands. 14 | " ================= 15 | " Start agent with `:Tandem` 16 | " Start agent and connect to network with `:Tandem ` 17 | com! -nargs=* Tandem py tandem_plugin.start() 18 | " ================ 19 | " Stop agent (and disconnect from network) with `:TandemStop` 20 | com! TandemStop py tandem_plugin.stop(False) 21 | 22 | " Show Session ID for active session 23 | com! TandemSession py tandem_plugin.show_session_id() 24 | 25 | " Get the absolute path to the folder this script resides in, respecting 26 | " symlinks 27 | let s:path = fnamemodify(resolve(expand(':p')), ':h') 28 | 29 | python << EOF 30 | 31 | import os 32 | import sys 33 | import vim 34 | 35 | # Add the script path to the python path 36 | local_path = vim.eval("s:path") 37 | if local_path not in sys.path: 38 | sys.path.insert(0, local_path) 39 | 40 | import tandem_lib.tandem_plugin as plugin 41 | import tandem_lib.agent.tandem.agent.protocol.messages.editor as m 42 | 43 | class TandemVimPlugin: 44 | def __init__(self): 45 | self._tandem = plugin.TandemPlugin( 46 | vim=vim, 47 | on_start=self._set_up_autocommands, 48 | message_handler=self._handle_message, 49 | ) 50 | self._message = None 51 | 52 | def _handle_message(self, message): 53 | self._message = message 54 | if isinstance(message, m.ApplyText): 55 | vim.command(":doautocmd User TandemApplyText") 56 | elif isinstance(message, m.WriteRequest): 57 | vim.command(":doautocmd User TandemWriteRequest") 58 | elif isinstance(message, m.SessionInfo): 59 | vim.command('echom "Session ID: {}"'.format(message.session_id)) 60 | self._session_id = message.session_id 61 | 62 | def _handle_apply_text(self): 63 | self._tandem.handle_apply_text(self._message) 64 | self._message = None 65 | 66 | def _handle_write_request(self): 67 | self._tandem.handle_write_request(self._message) 68 | self._message = None 69 | 70 | def _check_buffer(self): 71 | self._tandem.check_buffer() 72 | 73 | def _set_up_autocommands(self): 74 | vim.command(':autocmd!') 75 | vim.command('autocmd TextChanged py tandem_plugin._check_buffer()') 76 | vim.command('autocmd TextChangedI py tandem_plugin._check_buffer()') 77 | vim.command('autocmd VimLeave * py tandem_plugin.stop()') 78 | vim.command("autocmd User TandemApplyText py tandem_plugin._handle_apply_text()") 79 | vim.command("autocmd User TandemWriteRequest py tandem_plugin._handle_write_request()") 80 | 81 | def start(self, session_id=None): 82 | self._tandem.start(session_id) 83 | self._session_id = session_id 84 | 85 | def stop(self, invoked_from_autocmd=True): 86 | self._tandem.stop(invoked_from_autocmd) 87 | self._session_id = None 88 | 89 | def show_session_id(self): 90 | if not plugin.is_active: 91 | vim.command(':echom "No instance running."') 92 | return 93 | vim.command('echom "Session ID: {}"'.format(self._session_id)) 94 | 95 | 96 | tandem_plugin = TandemVimPlugin() 97 | 98 | EOF 99 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/utils/fragment.py: -------------------------------------------------------------------------------- 1 | from tandem.shared.stores.fragment import FragmentStore 2 | from tandem.shared.models.fragment import Fragment 3 | 4 | 5 | class FragmentUtils(object): 6 | HEADER = b"\x54\x01" 7 | FRAGMENT_HEADER = b"\x46\x52" 8 | FRAGMENT_HEADER_LENGTH = len(HEADER) + len(FRAGMENT_HEADER) + 6 9 | 10 | MAX_SEQUENCE_NUMBER = int(0xFFFF) 11 | next_sequence_number = -1 12 | 13 | @classmethod 14 | def is_fragment(cls, message): 15 | return ( 16 | message[0:2] == cls.HEADER and 17 | message[2:4] == cls.FRAGMENT_HEADER 18 | ) 19 | 20 | @staticmethod 21 | def should_fragment(message, max_message_length): 22 | return len(message) > max_message_length 23 | 24 | @classmethod 25 | def get_next_sequence_number(cls): 26 | cls.next_sequence_number += 1 27 | cls.next_sequence_number %= cls.MAX_SEQUENCE_NUMBER + 1 28 | 29 | return cls.next_sequence_number 30 | 31 | @staticmethod 32 | def serialize(fragment, sequence_number): 33 | result = [] 34 | result.append(FragmentUtils.HEADER) 35 | result.append(FragmentUtils.FRAGMENT_HEADER) 36 | result.append( 37 | fragment.get_total_fragments().to_bytes(2, byteorder="big") 38 | ) 39 | result.append( 40 | sequence_number.to_bytes(2, byteorder="big") 41 | ) 42 | result.append( 43 | fragment.get_fragment_number().to_bytes(2, byteorder="big") 44 | ) 45 | result.append(fragment.get_payload()) 46 | return b"".join(result) 47 | 48 | @staticmethod 49 | def deserialize(message): 50 | total_fragments = int.from_bytes(message[4:6], byteorder="big") 51 | sequence_number = int.from_bytes(message[6:8], byteorder="big") 52 | fragment_number = int.from_bytes(message[8:10], byteorder="big") 53 | payload = message[10:] 54 | 55 | new_fragment = Fragment(total_fragments, fragment_number, payload) 56 | return new_fragment, sequence_number 57 | 58 | @classmethod 59 | def fragment(cls, payload, max_message_length): 60 | max_payload_length = max_message_length - cls.FRAGMENT_HEADER_LENGTH 61 | 62 | payloads = [ 63 | payload[i:i + max_payload_length] 64 | for i in range(0, len(payload), max_payload_length) 65 | ] 66 | 67 | fragments = [ 68 | Fragment(len(payloads), index, payload) 69 | for index, payload in enumerate(payloads) 70 | ] 71 | 72 | sequence_number = FragmentUtils.get_next_sequence_number() 73 | messages = [ 74 | FragmentUtils.serialize(fragment, sequence_number) 75 | for fragment in fragments 76 | ] 77 | 78 | return messages 79 | 80 | @staticmethod 81 | def defragment(raw_data, sender_address): 82 | fragment_store = FragmentStore.get_instance() 83 | fragment, sequence_number = FragmentUtils.deserialize(raw_data) 84 | fragment_store.insert_fragment( 85 | sender_address, 86 | sequence_number, 87 | fragment 88 | ) 89 | fragment_group = fragment_store.get_fragment_group( 90 | sender_address, 91 | sequence_number, 92 | ) 93 | 94 | defragmented_data = fragment_group.defragment() 95 | if fragment_group.is_complete(): 96 | fragment_store.remove_fragment_group( 97 | sender_address, 98 | sequence_number, 99 | ) 100 | return defragmented_data 101 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/models/connection.py: -------------------------------------------------------------------------------- 1 | from tandem.shared.models.base import ModelBase 2 | from tandem.agent.models.connection_state import ConnectionState 3 | import logging 4 | 5 | 6 | class Connection(ModelBase): 7 | def __init__(self, peer): 8 | self._peer = peer 9 | 10 | def get_id(self): 11 | return self._peer.get_id() 12 | 13 | def get_active_address(self): 14 | raise NotImplementedError 15 | 16 | def get_connection_state(self): 17 | raise NotImplementedError 18 | 19 | def set_connection_state(self, state): 20 | raise NotImplementedError 21 | 22 | def is_relayed(self): 23 | return self.get_connection_state() == ConnectionState.RELAY 24 | 25 | def get_peer(self): 26 | return self._peer 27 | 28 | 29 | class DirectConnection(Connection): 30 | """ 31 | A connection to a peer established without using hole punching. 32 | """ 33 | def __init__(self, peer): 34 | super(DirectConnection, self).__init__(peer) 35 | 36 | def get_active_address(self): 37 | return self.get_peer().get_public_address() 38 | 39 | def get_connection_state(self): 40 | return ConnectionState.OPEN 41 | 42 | def set_connection_state(self, state): 43 | pass 44 | 45 | 46 | class HolePunchedConnection(Connection): 47 | """ 48 | A connection to a peer that was established with hole punching. 49 | """ 50 | PROMOTE_AFTER = 3 51 | 52 | def __init__(self, peer, initiated_connection): 53 | super(HolePunchedConnection, self).__init__(peer) 54 | self._active_address = None 55 | self._interval_handle = None 56 | self._connection_state = ConnectionState.PING 57 | # If true, this agent initiated the connection to this peer 58 | self._initiated_connection = initiated_connection 59 | 60 | self._address_ping_counts = {} 61 | self._address_ping_counts[peer.get_public_address()] = 0 62 | if peer.get_private_address() is not None: 63 | self._address_ping_counts[peer.get_private_address()] = 0 64 | 65 | def get_active_address(self): 66 | if self._active_address is None: 67 | self._active_address = self._compute_active_address() 68 | return self._active_address 69 | 70 | def get_connection_state(self): 71 | return self._connection_state 72 | 73 | def set_connection_state(self, state): 74 | if self._connection_state == state: 75 | return 76 | self._connection_state = state 77 | if self._interval_handle is not None: 78 | self._interval_handle.cancel() 79 | self._interval_handle = None 80 | 81 | def set_interval_handle(self, interval_handle): 82 | self._interval_handle = interval_handle 83 | 84 | def bump_ping_count(self, address): 85 | if address in self._address_ping_counts: 86 | self._address_ping_counts[address] += 1 87 | 88 | def initiated_connection(self): 89 | return self._initiated_connection 90 | 91 | def _compute_active_address(self): 92 | if self.is_relayed(): 93 | return self.get_peer().get_public_address() 94 | 95 | private_address = self.get_peer().get_private_address() 96 | private_address_count = ( 97 | self._address_ping_counts[private_address] 98 | if private_address is not None else 0 99 | ) 100 | 101 | # If the private address is routable, always choose it 102 | if private_address_count > 0: 103 | return ( 104 | private_address 105 | if private_address_count >= HolePunchedConnection.PROMOTE_AFTER 106 | else None 107 | ) 108 | 109 | public_address = self.get_peer().get_public_address() 110 | public_address_count = self._address_ping_counts[public_address] 111 | return ( 112 | public_address 113 | if public_address_count >= HolePunchedConnection.PROMOTE_AFTER 114 | else None 115 | ) 116 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/protocol/messages/interagent.py: -------------------------------------------------------------------------------- 1 | from tandem.shared.protocol.messages.base import ( 2 | ProtocolMessageTypeBase, 3 | ProtocolMessageBase, 4 | ProtocolUtilsBase, 5 | ) 6 | from tandem.shared.utils.static_value import static_value as staticvalue 7 | 8 | 9 | class InteragentProtocolMessageType(ProtocolMessageTypeBase): 10 | # Connection setup messages 11 | Ping = "ia-ping" 12 | PingBack = "ia-ping-back" 13 | Syn = "ia-syn" 14 | Hello = "ia-hello" 15 | 16 | # Regular interagent messages 17 | NewOperations = "ia-new-operations" 18 | Bye = "ia-bye" 19 | 20 | 21 | class Ping(ProtocolMessageBase): 22 | """ 23 | Sent by the agent to a peer to maintain or open a connection. 24 | """ 25 | def __init__(self, **kwargs): 26 | super(Ping, self).__init__( 27 | InteragentProtocolMessageType.Ping, 28 | **kwargs, 29 | ) 30 | 31 | @staticvalue 32 | def _payload_keys(self): 33 | return ["id"] 34 | 35 | 36 | class PingBack(ProtocolMessageBase): 37 | """ 38 | Sent in response to a Ping message to acknowledge receipt. 39 | """ 40 | def __init__(self, **kwargs): 41 | super(PingBack, self).__init__( 42 | InteragentProtocolMessageType.PingBack, 43 | **kwargs, 44 | ) 45 | 46 | @staticvalue 47 | def _payload_keys(self): 48 | return ["id"] 49 | 50 | 51 | class Syn(ProtocolMessageBase): 52 | """ 53 | Sent by the connection initiator to indicate that it has 54 | completed its connection set up and wishes to begin 55 | communicating via regular protocol messages. 56 | 57 | The initiator should continue sending this message until 58 | it receives a regular protocol message from the non-initiator. 59 | """ 60 | def __init__(self, **kwargs): 61 | super(Syn, self).__init__( 62 | InteragentProtocolMessageType.Syn, 63 | **kwargs, 64 | ) 65 | 66 | @staticvalue 67 | def _payload_keys(self): 68 | return [] 69 | 70 | 71 | class Hello(ProtocolMessageBase): 72 | """ 73 | Sent directly from one agent to another to introduce itself. 74 | 75 | This message is used to directly establish a connection. It 76 | is sent after receiving a ConnectTo message from the plugin. 77 | 78 | The should_reply flag is set if the agent wants the remote 79 | peer to respond with a Hello message containing its ID. 80 | """ 81 | def __init__(self, **kwargs): 82 | super(Hello, self).__init__( 83 | InteragentProtocolMessageType.Hello, 84 | **kwargs, 85 | ) 86 | 87 | @staticvalue 88 | def _payload_keys(self): 89 | return ["id", "should_reply"] 90 | 91 | 92 | class Bye(ProtocolMessageBase): 93 | def __init__(self, **kwargs): 94 | super(Bye, self).__init__( 95 | InteragentProtocolMessageType.Bye, 96 | **kwargs, 97 | ) 98 | 99 | @staticvalue 100 | def _payload_keys(self): 101 | return [] 102 | 103 | 104 | class NewOperations(ProtocolMessageBase): 105 | """ 106 | Sent to other agents to notify them of new CRDT operations to apply. 107 | """ 108 | def __init__(self, **kwargs): 109 | super(NewOperations, self).__init__( 110 | InteragentProtocolMessageType.NewOperations, 111 | **kwargs, 112 | ) 113 | 114 | @staticvalue 115 | def _payload_keys(self): 116 | return ['operations_list'] 117 | 118 | 119 | class InteragentProtocolUtils(ProtocolUtilsBase): 120 | @classmethod 121 | @staticvalue 122 | def _protocol_message_constructors(cls): 123 | return { 124 | InteragentProtocolMessageType.Ping.value: Ping, 125 | InteragentProtocolMessageType.PingBack.value: PingBack, 126 | InteragentProtocolMessageType.Syn.value: Syn, 127 | InteragentProtocolMessageType.Hello.value: Hello, 128 | InteragentProtocolMessageType.Bye.value: Bye, 129 | InteragentProtocolMessageType.NewOperations.value: NewOperations, 130 | } 131 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/executables/agent.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import uuid 3 | from tandem.agent.io.document import Document 4 | from tandem.agent.io.std_streams import STDStreams 5 | from tandem.shared.io.udp_gateway import UDPGateway 6 | from tandem.agent.protocol.handlers.editor import EditorProtocolHandler 7 | from tandem.agent.protocol.handlers.interagent import InteragentProtocolHandler 8 | from tandem.agent.protocol.handlers.rendezvous import RendezvousProtocolHandler 9 | from tandem.shared.protocol.handlers.multi import MultiProtocolHandler 10 | from tandem.shared.utils.time_scheduler import TimeScheduler 11 | from tandem.shared.io.proxies.fragment import FragmentProxy 12 | from tandem.shared.io.proxies.list_parameters import ListParametersProxy 13 | from tandem.shared.io.proxies.unicode import UnicodeProxy 14 | from tandem.shared.io.proxies.reliability import ReliabilityProxy 15 | from tandem.agent.io.proxies.relay import AgentRelayProxy 16 | from concurrent.futures import ThreadPoolExecutor 17 | from tandem.agent.configuration import RENDEZVOUS_ADDRESS 18 | 19 | 20 | class TandemAgent: 21 | def __init__(self, host, port): 22 | self._id = uuid.uuid4() 23 | self._requested_host = host 24 | # This is the port the user specified on the command line (it can be 0) 25 | self._requested_port = port 26 | self._main_executor = ThreadPoolExecutor(max_workers=1) 27 | self._time_scheduler = TimeScheduler(self._main_executor) 28 | self._document = Document() 29 | self._std_streams = STDStreams(self._on_std_input) 30 | self._interagent_gateway = UDPGateway( 31 | self._requested_host, 32 | self._requested_port, 33 | self._gateway_message_handler, 34 | [ 35 | ListParametersProxy(), 36 | UnicodeProxy(), 37 | FragmentProxy(), 38 | AgentRelayProxy(RENDEZVOUS_ADDRESS), 39 | ReliabilityProxy(self._time_scheduler), 40 | ], 41 | ) 42 | self._editor_protocol = EditorProtocolHandler( 43 | self._id, 44 | self._std_streams, 45 | self._interagent_gateway, 46 | self._document, 47 | ) 48 | self._interagent_protocol = InteragentProtocolHandler( 49 | self._id, 50 | self._std_streams, 51 | self._interagent_gateway, 52 | self._document, 53 | self._time_scheduler, 54 | ) 55 | self._rendezvous_protocol = RendezvousProtocolHandler( 56 | self._id, 57 | self._interagent_gateway, 58 | self._time_scheduler, 59 | self._document, 60 | ) 61 | self._gateway_handlers = MultiProtocolHandler( 62 | self._interagent_protocol, 63 | self._rendezvous_protocol, 64 | ) 65 | 66 | def __enter__(self): 67 | self.start() 68 | return self 69 | 70 | def __exit__(self, exc_type, exc_value, traceback): 71 | self.stop() 72 | 73 | def start(self): 74 | self._time_scheduler.start() 75 | self._document.start() 76 | self._std_streams.start() 77 | self._interagent_gateway.start() 78 | logging.info("Tandem Agent has started.") 79 | 80 | def stop(self): 81 | def atomic_shutdown(): 82 | self._interagent_protocol.stop() 83 | self._interagent_gateway.stop() 84 | self._std_streams.stop() 85 | self._document.stop() 86 | self._time_scheduler.stop() 87 | self._main_executor.submit(atomic_shutdown) 88 | self._main_executor.shutdown() 89 | logging.info("Tandem Agent has shut down.") 90 | 91 | def _on_std_input(self, retrieve_data): 92 | # Called by _std_streams after receiving a new message from the plugin 93 | self._main_executor.submit( 94 | self._editor_protocol.handle_message, 95 | retrieve_data, 96 | ) 97 | 98 | def _gateway_message_handler(self, retrieve_data): 99 | # Do not call directly - called by _interagent_gateway 100 | self._main_executor.submit( 101 | self._gateway_handlers.handle_raw_data, 102 | retrieve_data, 103 | ) 104 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/protocol/handlers/rendezvous.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import json 3 | import uuid 4 | from tandem.agent.configuration import USE_RELAY 5 | from tandem.agent.models.connection import HolePunchedConnection 6 | from tandem.agent.stores.connection import ConnectionStore 7 | from tandem.agent.utils.hole_punching import HolePunchingUtils 8 | from tandem.shared.models.peer import Peer 9 | from tandem.shared.protocol.handlers.addressed import AddressedHandler 10 | from tandem.shared.protocol.messages.rendezvous import ( 11 | RendezvousProtocolUtils, 12 | RendezvousProtocolMessageType, 13 | ) 14 | from tandem.agent.protocol.messages.interagent import ( 15 | InteragentProtocolUtils, 16 | NewOperations, 17 | ) 18 | from tandem.shared.utils.static_value import static_value as staticvalue 19 | from tandem.agent.models.connection_state import ConnectionState 20 | 21 | 22 | class RendezvousProtocolHandler(AddressedHandler): 23 | @staticvalue 24 | def _protocol_message_utils(self): 25 | return RendezvousProtocolUtils 26 | 27 | @staticvalue 28 | def _protocol_message_handlers(self): 29 | return { 30 | RendezvousProtocolMessageType.SetupParameters.value: 31 | self._handle_setup_parameters, 32 | RendezvousProtocolMessageType.Error.value: 33 | self._handle_error, 34 | } 35 | 36 | def __init__(self, id, gateway, time_scheduler, document): 37 | self._id = id 38 | self._gateway = gateway 39 | self._time_scheduler = time_scheduler 40 | self._document = document 41 | 42 | def _handle_setup_parameters(self, message, sender_address): 43 | public_address = (message.public[0], message.public[1]) 44 | private_address = (message.private[0], message.private[1]) 45 | logging.debug( 46 | "Received SetupParameters - Connect to {} at public {}:{} " 47 | "and private {}:{}" 48 | .format(message.peer_id, *public_address, *private_address), 49 | ) 50 | peer = Peer( 51 | id=uuid.UUID(message.peer_id), 52 | public_address=public_address, 53 | private_address=private_address, 54 | ) 55 | new_connection = HolePunchedConnection( 56 | peer=peer, 57 | initiated_connection=message.initiate, 58 | ) 59 | new_connection.set_interval_handle(self._time_scheduler.run_every( 60 | HolePunchingUtils.PING_INTERVAL, 61 | HolePunchingUtils.generate_send_ping( 62 | self._gateway, 63 | peer.get_addresses(), 64 | self._id, 65 | ), 66 | )) 67 | 68 | def handle_hole_punching_timeout(connection): 69 | if connection.get_connection_state() == ConnectionState.OPEN: 70 | return 71 | 72 | if not USE_RELAY: 73 | logging.info( 74 | "Connection {} is unreachable. Not switching to RELAY " 75 | "because it was disabled." 76 | .format(connection.get_peer().get_public_address()), 77 | ) 78 | connection.set_connection_state(ConnectionState.UNREACHABLE) 79 | return 80 | 81 | logging.info("Switching connection {} to RELAY".format( 82 | connection.get_peer().get_public_address() 83 | )) 84 | 85 | connection.set_connection_state(ConnectionState.RELAY) 86 | 87 | operations = self._document.get_document_operations() 88 | payload = InteragentProtocolUtils.serialize(NewOperations( 89 | operations_list=json.dumps(operations) 90 | )) 91 | io_data = self._gateway.generate_io_data( 92 | payload, 93 | connection.get_peer().get_public_address(), 94 | ) 95 | self._gateway.write_io_data( 96 | io_data, 97 | reliability=True, 98 | ) 99 | 100 | self._time_scheduler.run_after( 101 | HolePunchingUtils.TIMEOUT, 102 | handle_hole_punching_timeout, 103 | new_connection 104 | ) 105 | ConnectionStore.get_instance().add_connection(new_connection) 106 | 107 | def _handle_error(self, message, sender_address): 108 | logging.info("Rendezvous Error: {}".format(message.message)) 109 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/shared/utils/time_scheduler.py: -------------------------------------------------------------------------------- 1 | import sched 2 | import time 3 | from threading import Thread, Event 4 | 5 | 6 | class TimeScheduler: 7 | """ 8 | Schedules tasks to run in the future on an executor. 9 | 10 | The queue of tasks to execute is inspected every 11 | resolution_seconds. So the minimal delay for a task 12 | is the resolution of this scheduler. 13 | """ 14 | def __init__(self, executor, resolution_seconds=0.1): 15 | self._executor = executor 16 | self._resolution_seconds = resolution_seconds 17 | 18 | self._shutting_down = False 19 | self._shut_down_event = Event() 20 | self._runner = Thread(target=self._run_scheduler) 21 | self._scheduler = sched.scheduler(time.time, time.sleep) 22 | 23 | def run_after(self, delay_seconds, function, *args, **kwargs): 24 | """ 25 | Schedules the specified function on the executor after delay_seconds. 26 | 27 | This returns a handle that has a cancel() method to cancel the 28 | request to run this function. 29 | """ 30 | handle = _Handle(self) 31 | handle.set_event_handle(self._schedule_after( 32 | delay_seconds, 33 | function, 34 | handle, 35 | lambda: None, 36 | *args, 37 | **kwargs, 38 | )) 39 | return handle 40 | 41 | def run_every(self, interval_seconds, function, *args, **kwargs): 42 | """ 43 | Schedules the specified function at least every interval_seconds. 44 | 45 | This returns a handle that has a cancel() method to cancel the 46 | request to run this function. 47 | 48 | This only guarantees that at least interval_seconds elapses 49 | between each invocation of the function. It does not guarantee 50 | that the function runs exactly every interval_seconds. 51 | """ 52 | handle = _Handle(self) 53 | 54 | def reschedule(): 55 | handle.set_event_handle(self._schedule_after( 56 | interval_seconds, 57 | function, 58 | handle, 59 | reschedule, 60 | *args, 61 | **kwargs, 62 | )) 63 | 64 | reschedule() 65 | return handle 66 | 67 | def start(self): 68 | """ 69 | Starts this scheduler. 70 | 71 | Until this is called, no tasks will actually be scheduled. 72 | However the scheduler will still accept schedule requests. 73 | """ 74 | self._runner.start() 75 | 76 | def stop(self): 77 | """ 78 | Stops this scheduler. 79 | 80 | All remaining pending tasks after this returns will no 81 | longer be scheduled. This does not wait for all pending 82 | tasks to be scheduled. 83 | """ 84 | self._shutting_down = True 85 | self._shut_down_event.set() 86 | self._runner.join() 87 | 88 | def _cancel(self, event_handle): 89 | try: 90 | self._scheduler.cancel(event_handle) 91 | except ValueError: 92 | pass 93 | 94 | def _schedule_after( 95 | self, 96 | delay_seconds, 97 | function, 98 | handle, 99 | epilogue, 100 | *args, 101 | **kwargs, 102 | ): 103 | return self._scheduler.enter( 104 | delay_seconds, 105 | 0, 106 | self._executor.submit, 107 | (self._run_if_not_cancelled, function, handle, epilogue, *args), 108 | kwargs, 109 | ) 110 | 111 | def _run_if_not_cancelled( 112 | self, 113 | function, 114 | handle, 115 | epilogue, 116 | *args, 117 | **kwargs, 118 | ): 119 | if handle.is_cancelled(): 120 | return 121 | try: 122 | function(*args, **kwargs) 123 | finally: 124 | epilogue() 125 | 126 | def _run_scheduler(self): 127 | while not self._shutting_down: 128 | self._scheduler.run(blocking=False) 129 | self._shut_down_event.wait(timeout=self._resolution_seconds) 130 | 131 | 132 | class _Handle: 133 | def __init__(self, scheduler): 134 | self._scheduler = scheduler 135 | self._event_handle = None 136 | self._cancelled = False 137 | 138 | def cancel(self): 139 | self._cancelled = True 140 | self._scheduler._cancel(self._event_handle) 141 | 142 | def is_cancelled(self): 143 | return self._cancelled 144 | 145 | def set_event_handle(self, new_handle): 146 | self._event_handle = new_handle 147 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/protocol/handlers/editor.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import logging 4 | import socket 5 | import uuid 6 | import tandem.agent.protocol.messages.editor as em 7 | from tandem.agent.protocol.messages.interagent import ( 8 | InteragentProtocolUtils, 9 | NewOperations, 10 | Hello 11 | ) 12 | from tandem.agent.stores.connection import ConnectionStore 13 | from tandem.shared.protocol.messages.rendezvous import ( 14 | RendezvousProtocolUtils, 15 | ConnectRequest, 16 | ) 17 | from tandem.agent.configuration import RENDEZVOUS_ADDRESS 18 | 19 | 20 | class EditorProtocolHandler: 21 | def __init__(self, id, std_streams, gateway, document): 22 | self._id = id 23 | self._std_streams = std_streams 24 | self._gateway = gateway 25 | self._document = document 26 | 27 | def handle_message(self, retrieve_io_data): 28 | io_data = retrieve_io_data() 29 | data = io_data.get_data() 30 | 31 | try: 32 | message = em.deserialize(data) 33 | if type(message) is em.ConnectTo: 34 | self._handle_connect_to(message) 35 | elif type(message) is em.WriteRequestAck: 36 | self._handle_write_request_ack(message) 37 | elif type(message) is em.NewPatches: 38 | self._handle_new_patches(message) 39 | elif type(message) is em.CheckDocumentSync: 40 | self._handle_check_document_sync(message) 41 | elif type(message) is em.HostSession: 42 | self._handle_host_session(message) 43 | elif type(message) is em.JoinSession: 44 | self._handle_join_session(message) 45 | except em.EditorProtocolMarshalError: 46 | logging.info("Ignoring invalid editor protocol message.") 47 | except: 48 | logging.exception( 49 | "Exception when handling editor protocol message:") 50 | raise 51 | 52 | def _handle_connect_to(self, message): 53 | hostname = socket.gethostbyname(message.host) 54 | logging.info( 55 | "Tandem Agent is attempting to establish a direct" 56 | " connection to {}:{}.".format(hostname, message.port), 57 | ) 58 | 59 | address = (hostname, message.port) 60 | payload = InteragentProtocolUtils.serialize(Hello( 61 | id=str(self._id), 62 | should_reply=True, 63 | )) 64 | io_data = self._gateway.generate_io_data(payload, address) 65 | self._gateway.write_io_data(io_data) 66 | 67 | def _handle_write_request_ack(self, message): 68 | logging.debug("Received ACK for seq: {}".format(message.seq)) 69 | text_patches = self._document.apply_queued_operations() 70 | self._document.set_write_request_sent(False) 71 | # Even if no text patches need to be applied, we need to reply to 72 | # the plugin to allow it to accept changes from the user again 73 | text_patches_message = em.ApplyPatches(text_patches) 74 | io_data = self._std_streams.generate_io_data( 75 | em.serialize(text_patches_message), 76 | ) 77 | self._std_streams.write_io_data(io_data) 78 | logging.debug( 79 | "Sent apply patches message for seq: {}".format(message.seq), 80 | ) 81 | 82 | def _handle_new_patches(self, message): 83 | nested_operations = [ 84 | self._document.set_text_in_range( 85 | patch["start"], 86 | patch["end"], 87 | patch["text"], 88 | ) 89 | for patch in message.patch_list 90 | ] 91 | operations = [] 92 | for operations_list in nested_operations: 93 | operations.extend(operations_list) 94 | 95 | connections = ConnectionStore.get_instance().get_open_connections() 96 | if len(connections) == 0: 97 | return 98 | 99 | addresses = [ 100 | connection.get_active_address() for connection in connections 101 | ] 102 | payload = InteragentProtocolUtils.serialize(NewOperations( 103 | operations_list=json.dumps(operations) 104 | )) 105 | io_data = self._gateway.generate_io_data(payload, addresses) 106 | self._gateway.write_io_data( 107 | io_data, 108 | reliability=True, 109 | ) 110 | 111 | def _handle_check_document_sync(self, message): 112 | document_text_content = self._document.get_document_text() 113 | 114 | # TODO: ignore all other messages until we receive an ack 115 | contents = os.linesep.join(message.contents) + os.linesep 116 | 117 | if (contents != document_text_content): 118 | document_lines = document_text_content.split(os.linesep) 119 | apply_text = em.serialize(em.ApplyText(document_lines)) 120 | io_data = self._std_streams.generate_io_data(apply_text) 121 | self._std_streams.write_io_data(io_data) 122 | 123 | def _handle_host_session(self, message): 124 | # Register with rendezvous 125 | session_id = uuid.uuid4() 126 | self._send_connect_request(session_id) 127 | 128 | # Inform plugin of session id 129 | session_info = em.serialize(em.SessionInfo(session_id=str(session_id))) 130 | io_data = self._std_streams.generate_io_data(session_info) 131 | self._std_streams.write_io_data(io_data) 132 | 133 | def _handle_join_session(self, message): 134 | # Parse ID to make sure it's a UUID 135 | session_id = uuid.UUID(message.session_id) 136 | self._send_connect_request(session_id) 137 | 138 | def _send_connect_request(self, session_id): 139 | io_data = self._gateway.generate_io_data( 140 | RendezvousProtocolUtils.serialize(ConnectRequest( 141 | session_id=str(session_id), 142 | my_id=str(self._id), 143 | private_address=( 144 | socket.gethostbyname(socket.gethostname()), 145 | self._gateway.get_port(), 146 | ), 147 | )), 148 | RENDEZVOUS_ADDRESS, 149 | ) 150 | self._gateway.write_io_data(io_data) 151 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/protocol/handlers/interagent.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import json 3 | import uuid 4 | import tandem.agent.protocol.messages.editor as em 5 | 6 | from tandem.agent.models.connection import DirectConnection 7 | from tandem.agent.models.connection_state import ConnectionState 8 | from tandem.agent.protocol.messages.interagent import ( 9 | InteragentProtocolMessageType, 10 | InteragentProtocolUtils, 11 | NewOperations, 12 | Bye, 13 | Hello, 14 | PingBack, 15 | ) 16 | from tandem.agent.stores.connection import ConnectionStore 17 | from tandem.agent.utils.hole_punching import HolePunchingUtils 18 | from tandem.shared.models.peer import Peer 19 | from tandem.shared.protocol.handlers.addressed import AddressedHandler 20 | from tandem.shared.utils.static_value import static_value as staticvalue 21 | 22 | 23 | class InteragentProtocolHandler(AddressedHandler): 24 | @staticvalue 25 | def _protocol_message_utils(self): 26 | return InteragentProtocolUtils 27 | 28 | @staticvalue 29 | def _protocol_message_handlers(self): 30 | return { 31 | InteragentProtocolMessageType.Ping.value: self._handle_ping, 32 | InteragentProtocolMessageType.PingBack.value: 33 | self._handle_pingback, 34 | InteragentProtocolMessageType.Syn.value: self._handle_syn, 35 | InteragentProtocolMessageType.Hello.value: self._handle_hello, 36 | InteragentProtocolMessageType.Bye.value: self._handle_bye, 37 | InteragentProtocolMessageType.NewOperations.value: 38 | self._handle_new_operations, 39 | } 40 | 41 | def __init__(self, id, std_streams, gateway, document, time_scheduler): 42 | self._id = id 43 | self._std_streams = std_streams 44 | self._gateway = gateway 45 | self._document = document 46 | self._time_scheduler = time_scheduler 47 | self._next_editor_sequence = 0 48 | 49 | def _handle_ping(self, message, sender_address): 50 | peer_id = uuid.UUID(message.id) 51 | connection = \ 52 | ConnectionStore.get_instance().get_connection_by_id(peer_id) 53 | 54 | # Only reply to peers we know about to prevent the other peer from 55 | # thinking it can reach us successfully 56 | if connection is None: 57 | return 58 | 59 | logging.debug( 60 | "Replying to ping from {} at {}:{}." 61 | .format(message.id, *sender_address), 62 | ) 63 | io_data = self._gateway.generate_io_data( 64 | InteragentProtocolUtils.serialize(PingBack(id=str(self._id))), 65 | sender_address, 66 | ) 67 | self._gateway.write_io_data(io_data) 68 | 69 | def _handle_pingback(self, message, sender_address): 70 | peer_id = uuid.UUID(message.id) 71 | connection = \ 72 | ConnectionStore.get_instance().get_connection_by_id(peer_id) 73 | # Only count PingBack messages from peers we know about and from whom 74 | # we expect PingBack messages 75 | if (connection is None or 76 | connection.get_connection_state() != ConnectionState.PING): 77 | return 78 | 79 | logging.debug( 80 | "Counting ping from {} at {}:{}." 81 | .format(message.id, *sender_address), 82 | ) 83 | connection.bump_ping_count(sender_address) 84 | 85 | # When the connection is ready to transition into the SYN/WAIT states, 86 | # an active address will be available 87 | if connection.get_active_address() is None: 88 | return 89 | 90 | connection.set_connection_state( 91 | ConnectionState.SEND_SYN 92 | if connection.initiated_connection() 93 | else ConnectionState.WAIT_FOR_SYN 94 | ) 95 | logging.debug( 96 | "Promoted peer from {} with address {}:{}." 97 | .format(message.id, *(connection.get_active_address())), 98 | ) 99 | 100 | if connection.get_connection_state() == ConnectionState.SEND_SYN: 101 | logging.debug( 102 | "Will send SYN to {} at {}:{}" 103 | .format(message.id, *(connection.get_active_address())), 104 | ) 105 | connection.set_interval_handle(self._time_scheduler.run_every( 106 | HolePunchingUtils.SYN_INTERVAL, 107 | HolePunchingUtils.generate_send_syn( 108 | self._gateway, 109 | connection.get_active_address(), 110 | ), 111 | )) 112 | else: 113 | logging.debug( 114 | "Will wait for SYN from {} at {}:{}" 115 | .format(message.id, *(connection.get_active_address())), 116 | ) 117 | 118 | def _handle_syn(self, message, sender_address): 119 | logging.debug("Received SYN from {}:{}".format(*sender_address)) 120 | connection = ( 121 | ConnectionStore.get_instance() 122 | .get_connection_by_address(sender_address) 123 | ) 124 | if (connection is None or 125 | connection.get_connection_state() == ConnectionState.SEND_SYN): 126 | return 127 | 128 | connection.set_connection_state(ConnectionState.OPEN) 129 | self._send_all_operations(connection, even_if_empty=True) 130 | logging.debug( 131 | "Connection to peer at {}:{} is open." 132 | .format(*(connection.get_active_address())), 133 | ) 134 | 135 | def _handle_hello(self, message, sender_address): 136 | id = uuid.UUID(message.id) 137 | new_connection = DirectConnection(Peer( 138 | id=id, 139 | public_address=sender_address, 140 | )) 141 | ConnectionStore.get_instance().add_connection(new_connection) 142 | logging.info( 143 | "Tandem Agent established a direct connection to {}:{}" 144 | .format(*sender_address), 145 | ) 146 | 147 | if message.should_reply: 148 | io_data = self._gateway.generate_io_data( 149 | InteragentProtocolUtils.serialize(Hello( 150 | id=str(self._id), 151 | should_reply=False, 152 | )), 153 | sender_address, 154 | ) 155 | self._gateway.write_io_data(io_data) 156 | 157 | self._send_all_operations(new_connection) 158 | 159 | def _handle_bye(self, message, sender_address): 160 | connection_store = ConnectionStore.get_instance() 161 | connection = connection_store.get_connection_by_address(sender_address) 162 | if connection is None: 163 | return 164 | connection_store.remove_connection(connection) 165 | 166 | def _handle_new_operations(self, message, sender_address): 167 | connection = ( 168 | ConnectionStore.get_instance() 169 | .get_connection_by_address(sender_address) 170 | ) 171 | if (connection is not None and 172 | connection.get_connection_state() == ConnectionState.SEND_SYN): 173 | connection.set_connection_state(ConnectionState.OPEN) 174 | logging.debug( 175 | "Connection to peer at {}:{} is open." 176 | .format(*(connection.get_active_address())), 177 | ) 178 | 179 | operations_list = json.loads(message.operations_list) 180 | if len(operations_list) == 0: 181 | return 182 | self._document.enqueue_remote_operations(operations_list) 183 | 184 | if not self._document.write_request_sent(): 185 | io_data = self._std_streams.generate_io_data( 186 | em.serialize(em.WriteRequest(self._next_editor_sequence)), 187 | ) 188 | self._std_streams.write_io_data(io_data) 189 | self._document.set_write_request_sent(True) 190 | logging.debug( 191 | "Sent write request seq: {}" 192 | .format(self._next_editor_sequence), 193 | ) 194 | self._next_editor_sequence += 1 195 | 196 | def _send_all_operations(self, connection, even_if_empty=False): 197 | operations = self._document.get_document_operations() 198 | if not even_if_empty and len(operations) == 0: 199 | return 200 | 201 | payload = InteragentProtocolUtils.serialize(NewOperations( 202 | operations_list=json.dumps(operations) 203 | )) 204 | io_data = self._gateway.generate_io_data( 205 | payload, 206 | connection.get_active_address(), 207 | ) 208 | self._gateway.write_io_data( 209 | io_data, 210 | reliability=True, 211 | ) 212 | 213 | def stop(self): 214 | connections = ConnectionStore.get_instance().get_open_connections() 215 | io_data = self._gateway.generate_io_data( 216 | InteragentProtocolUtils.serialize(Bye()), 217 | [connection.get_active_address() for connection in connections], 218 | ) 219 | self._gateway.write_io_data(io_data) 220 | ConnectionStore.reset_instance() 221 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/test_client.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import random 4 | from subprocess import Popen, PIPE 5 | import tandem.agent.protocol.messages.editor as m 6 | from tandem.agent.configuration import BASE_DIR 7 | 8 | 9 | def start_agent(extra_args=None): 10 | if extra_args is None: 11 | extra_args = [] 12 | return Popen( 13 | ["python3", os.path.join(BASE_DIR, "main.py")] + extra_args, 14 | stdin=PIPE, 15 | stdout=PIPE, 16 | encoding="utf-8", 17 | ) 18 | 19 | 20 | def send_user_changed(agent_stdin, text): 21 | message = m.UserChangedEditorText(text) 22 | agent_stdin.write(m.serialize(message)) 23 | agent_stdin.write("\n") 24 | agent_stdin.flush() 25 | 26 | 27 | def send_new_patches(agent_stdin, start, end, text): 28 | patches = m.NewPatches([{ 29 | "start": start, 30 | "end": end, 31 | "text": text, 32 | }]) 33 | agent_stdin.write(m.serialize(patches)) 34 | agent_stdin.write("\n") 35 | agent_stdin.flush() 36 | 37 | 38 | def send_request_write_ack(agent_stdin, seq): 39 | agent_stdin.write(m.serialize(m.WriteRequestAck(seq))) 40 | agent_stdin.write("\n") 41 | agent_stdin.flush() 42 | 43 | 44 | def print_raw_message(agent_stdout): 45 | resp = agent_stdout.readline() 46 | print("Received: " + resp) 47 | 48 | 49 | def extract_message(agent_stdout): 50 | resp = agent_stdout.readline() 51 | return m.deserialize(resp) 52 | 53 | 54 | def get_string_ports(): 55 | starting_port = random.randint(60600, 62600) 56 | port1 = str(starting_port) 57 | port2 = str(starting_port + 1) 58 | return port1, port2 59 | 60 | 61 | def ping_test(): 62 | """ 63 | Starts 2 agents and checks that they can establish 64 | a connection to eachother and exchange a ping message. 65 | """ 66 | agent1_port, agent2_port = get_string_ports() 67 | 68 | agent1 = start_agent(["--port", agent1_port]) 69 | agent2 = start_agent([ 70 | "--port", 71 | agent2_port, 72 | "--log-file", 73 | "/tmp/tandem-agent-2.log", 74 | ]) 75 | 76 | # Wait for the agents to start accepting connections 77 | time.sleep(1) 78 | 79 | message = m.ConnectTo("localhost", int(agent1_port)) 80 | agent2.stdin.write(m.serialize(message)) 81 | agent2.stdin.write("\n") 82 | agent2.stdin.flush() 83 | 84 | # Wait for the pings 85 | time.sleep(2) 86 | 87 | agent1.stdin.close() 88 | agent1.terminate() 89 | agent2.stdin.close() 90 | agent2.terminate() 91 | 92 | agent1.wait() 93 | agent2.wait() 94 | 95 | 96 | def text_transfer_test(): 97 | """ 98 | Tests the Milestone 1 flow by starting 2 agents and 99 | transfering text data from one agent to the other. 100 | 101 | 1. Instruct agent 2 to connect to agent 1 102 | 2. Send a "text changed" message to agent 1 103 | (simulating what the plugin would do) 104 | 3. Expect an "apply text" message to be "output" by agent 2 105 | (this would be an instruction to the plugin to change 106 | the editor's text buffer) 107 | """ 108 | agent1_port, agent2_port = get_string_ports() 109 | 110 | agent1 = start_agent(["--port", agent1_port]) 111 | agent2 = start_agent([ 112 | "--port", 113 | agent2_port, 114 | "--log-file", 115 | "/tmp/tandem-agent-2.log", 116 | ]) 117 | 118 | # Wait for the agents to start accepting connections 119 | time.sleep(1) 120 | 121 | message = m.ConnectTo("localhost", int(agent1_port)) 122 | agent2.stdin.write(m.serialize(message)) 123 | agent2.stdin.write("\n") 124 | agent2.stdin.flush() 125 | 126 | # Wait for the pings 127 | time.sleep(2) 128 | 129 | # Simulate a text buffer change - the plugin notifes agent1 that 130 | # the text buffer has changed 131 | send_user_changed(agent1.stdin, ["Hello world!"]) 132 | 133 | # Expect agent2 to receive a ApplyText message 134 | print_raw_message(agent2.stdout) 135 | 136 | # Repeat 137 | send_user_changed(agent1.stdin, ["Hello world again!"]) 138 | print_raw_message(agent2.stdout) 139 | 140 | # Shut down the agents 141 | agent1.stdin.close() 142 | agent1.terminate() 143 | agent2.stdin.close() 144 | agent2.terminate() 145 | 146 | agent1.wait() 147 | agent2.wait() 148 | 149 | 150 | def crdt_test(): 151 | """ 152 | Tests the Milestone 2 flow. 153 | 1. Agent 1 makes a local change 154 | 2. Check that Agent 2 received the changes 155 | 3. Repeat 156 | 4. Agent 2 makes a local change 157 | 5. Check that Agent 1 received the changes 158 | """ 159 | agent1_port, agent2_port = get_string_ports() 160 | 161 | agent1 = start_agent(["--port", agent1_port]) 162 | agent2 = start_agent([ 163 | "--port", 164 | agent2_port, 165 | "--log-file", 166 | "/tmp/tandem-agent-2.log", 167 | ]) 168 | 169 | # Wait for the agents to start accepting connections 170 | time.sleep(1) 171 | 172 | message = m.ConnectTo("localhost", int(agent1_port)) 173 | agent2.stdin.write(m.serialize(message)) 174 | agent2.stdin.write("\n") 175 | agent2.stdin.flush() 176 | 177 | # Wait for connection 178 | time.sleep(1) 179 | 180 | # Simulate a text buffer change - the plugin notifes agent1 that 181 | # the text buffer has changed 182 | send_new_patches( 183 | agent1.stdin, 184 | {"row": 0, "column": 0}, 185 | {"row": 0, "column": 0}, 186 | "Hello world!", 187 | ) 188 | print("Agent 1 made an edit") 189 | 190 | # Simulate a text buffer change - the plugin notifes agent1 that 191 | # the text buffer has changed 192 | send_new_patches( 193 | agent1.stdin, 194 | {"row": 0, "column": 12}, 195 | {"row": 0, "column": 0}, 196 | " Hello world again!", 197 | ) 198 | print("Agent 1 made a second edit") 199 | 200 | # The agent should not resend the request write message 201 | time.sleep(1) 202 | 203 | # Expect agent2 to receive a "Request Write" message 204 | print_raw_message(agent2.stdout) 205 | 206 | # Allow the plugin to apply the remote changes 207 | send_request_write_ack(agent2.stdin, 0) 208 | 209 | # Expect agent2 to get the changes 210 | print_raw_message(agent2.stdout) 211 | 212 | # Simulate an edit that occurs on agent2's machine 213 | send_new_patches( 214 | agent2.stdin, 215 | {"row": 0, "column": 0}, 216 | {"row": 0, "column": 0}, 217 | "Agent 2 says hi! ", 218 | ) 219 | print("Agent 2 made an edit") 220 | 221 | # Expect agent1 to receive a RequestWrite message 222 | print_raw_message(agent1.stdout) 223 | 224 | # Allow changes to be applied 225 | send_request_write_ack(agent1.stdin, 0) 226 | 227 | # Expect to receive the text patches 228 | print_raw_message(agent1.stdout) 229 | 230 | # Simulate an edit that occurs on agent2's machine 231 | send_new_patches( 232 | agent2.stdin, 233 | {"row": 0, "column": 0}, 234 | {"row": 0, "column": 0}, 235 | "Agent 2 says hi again! ", 236 | ) 237 | print("Agent 2 made a second edit!") 238 | 239 | # Expect agent1 to receive a RequestWrite message 240 | print_raw_message(agent1.stdout) 241 | 242 | # Allow changes to be applied 243 | send_request_write_ack(agent1.stdin, 1) 244 | 245 | # Expect to receive the text patches 246 | print_raw_message(agent1.stdout) 247 | 248 | time.sleep(2) 249 | 250 | # Shut down the agents 251 | agent1.stdin.close() 252 | agent1.terminate() 253 | agent2.stdin.close() 254 | agent2.terminate() 255 | 256 | agent1.wait() 257 | agent2.wait() 258 | 259 | 260 | def hole_punch_test(): 261 | agent1_port, agent2_port = get_string_ports() 262 | agent3_port = str(int(agent2_port) + 1) 263 | 264 | agent1 = start_agent(["--port", agent1_port]) 265 | agent2 = start_agent([ 266 | "--port", 267 | agent2_port, 268 | "--log-file", 269 | "/tmp/tandem-agent-2.log", 270 | ]) 271 | agent3 = start_agent([ 272 | "--port", 273 | agent3_port, 274 | "--log-file", 275 | "/tmp/tandem-agent-3.log", 276 | ]) 277 | 278 | # Wait for the agents to start up 279 | time.sleep(1) 280 | 281 | host_session = m.HostSession() 282 | agent1.stdin.write(m.serialize(host_session)) 283 | agent1.stdin.write("\n") 284 | agent1.stdin.flush() 285 | 286 | session_info = extract_message(agent1.stdout) 287 | print("Session ID: {}".format(session_info.session_id)) 288 | 289 | join_session = m.JoinSession(session_id=session_info.session_id) 290 | agent2.stdin.write(m.serialize(join_session)) 291 | agent2.stdin.write("\n") 292 | agent2.stdin.flush() 293 | agent3.stdin.write(m.serialize(join_session)) 294 | agent3.stdin.write("\n") 295 | agent3.stdin.flush() 296 | 297 | time.sleep(5) 298 | 299 | # Shut down the agents 300 | agent1.stdin.close() 301 | agent1.terminate() 302 | agent2.stdin.close() 303 | agent2.terminate() 304 | agent3.stdin.close() 305 | agent3.terminate() 306 | 307 | agent1.wait() 308 | agent2.wait() 309 | agent3.wait() 310 | 311 | 312 | def main(): 313 | hole_punch_test() 314 | 315 | 316 | if __name__ == "__main__": 317 | main() 318 | -------------------------------------------------------------------------------- /plugin/tandem_lib/agent/tandem/agent/protocol/messages/editor.py: -------------------------------------------------------------------------------- 1 | import json 2 | import enum 3 | 4 | 5 | class EditorProtocolMarshalError(ValueError): 6 | pass 7 | 8 | 9 | class EditorProtocolMessageType(enum.Enum): 10 | ApplyText = "apply-text" 11 | ApplyPatches = "apply-patches" 12 | CheckDocumentSync = "check-document-sync" 13 | ConnectTo = "connect-to" 14 | HostSession = "host-session" 15 | JoinSession = "join-session" 16 | NewPatches = "new-patches" 17 | SessionInfo = "session-info" 18 | UserChangedEditorText = "user-changed-editor-text" 19 | WriteRequest = "write-request" 20 | WriteRequestAck = "write-request-ack" 21 | 22 | 23 | class UserChangedEditorText: 24 | """ 25 | Sent by the editor plugin to the agent to 26 | notify it that the user changed the text buffer. 27 | """ 28 | def __init__(self, contents): 29 | self.type = EditorProtocolMessageType.UserChangedEditorText 30 | self.contents = contents 31 | 32 | def to_payload(self): 33 | return { 34 | "contents": self.contents, 35 | } 36 | 37 | @staticmethod 38 | def from_payload(payload): 39 | return UserChangedEditorText(payload["contents"]) 40 | 41 | 42 | class CheckDocumentSync: 43 | """ 44 | Sent by the editor plugin to the agent to 45 | check whether the editor and the crdt have their 46 | document contents in sync 47 | """ 48 | def __init__(self, contents): 49 | self.type = EditorProtocolMessageType.CheckDocumentSync 50 | self.contents = contents 51 | 52 | def to_payload(self): 53 | return { 54 | "contents": self.contents, 55 | } 56 | 57 | @staticmethod 58 | def from_payload(payload): 59 | return CheckDocumentSync(payload["contents"]) 60 | 61 | 62 | class ApplyText: 63 | """ 64 | Sent by the agent to the editor plugin to 65 | notify it that someone else edited the text buffer. 66 | """ 67 | def __init__(self, contents): 68 | self.type = EditorProtocolMessageType.ApplyText 69 | self.contents = contents 70 | 71 | def to_payload(self): 72 | return { 73 | "contents": self.contents, 74 | } 75 | 76 | @staticmethod 77 | def from_payload(payload): 78 | return ApplyText(payload["contents"]) 79 | 80 | 81 | class ConnectTo: 82 | """ 83 | Sent by the plugin to the agent to tell it to connect 84 | to another agent. 85 | """ 86 | def __init__(self, host, port): 87 | self.type = EditorProtocolMessageType.ConnectTo 88 | self.host = host 89 | self.port = port 90 | 91 | def to_payload(self): 92 | return { 93 | "host": self.host, 94 | "port": self.port, 95 | } 96 | 97 | @staticmethod 98 | def from_payload(payload): 99 | return ConnectTo(payload["host"], payload["port"]) 100 | 101 | 102 | class WriteRequest: 103 | """ 104 | Sent by the agent to the plugin to request for the ability 105 | to apply remote operations to the CRDT. 106 | """ 107 | def __init__(self, seq): 108 | self.type = EditorProtocolMessageType.WriteRequest 109 | self.seq = seq 110 | 111 | def to_payload(self): 112 | return { 113 | "seq": self.seq, 114 | } 115 | 116 | @staticmethod 117 | def from_payload(payload): 118 | return WriteRequest(payload["seq"]) 119 | 120 | 121 | class WriteRequestAck: 122 | """ 123 | Sent by the plugin to the agent in response to a WriteRequest 124 | message to grant it permission to apply remote operations to the CRDT. 125 | 126 | By sending this message the plugin agrees to not allow users 127 | to modify their local buffer until the remote operations have been 128 | sent back to the plugin via an ApplyPatches message. 129 | """ 130 | def __init__(self, seq): 131 | self.type = EditorProtocolMessageType.WriteRequestAck 132 | self.seq = seq 133 | 134 | def to_payload(self): 135 | return { 136 | "seq": self.seq, 137 | } 138 | 139 | @staticmethod 140 | def from_payload(payload): 141 | return WriteRequestAck(payload["seq"]) 142 | 143 | 144 | class NewPatches: 145 | """ 146 | Sent by the plugin to the agent to inform it of changes made by 147 | the user to their local text buffer. 148 | 149 | patch_list should be a list of dictionaries where each dictionary 150 | represents a change that the user made to their local text buffer. 151 | The patches should be ordered such that they are applied in the 152 | correct order when the list is traversed from front to back. 153 | 154 | Each patch should have the form: 155 | 156 | { 157 | "start": {"row": , "column": }, 158 | "end": {"row": , "column": }, 159 | "text": , 160 | } 161 | """ 162 | def __init__(self, patch_list): 163 | self.type = EditorProtocolMessageType.NewPatches 164 | self.patch_list = patch_list 165 | 166 | def to_payload(self): 167 | return { 168 | "patch_list": self.patch_list 169 | } 170 | 171 | @staticmethod 172 | def from_payload(payload): 173 | return NewPatches(payload["patch_list"]) 174 | 175 | 176 | class ApplyPatches: 177 | """ 178 | Sent by the agent to the plugin to inform it of remote changes 179 | that should be applied to their local text buffer. 180 | 181 | patch_list will be a list of dictionaries where each dictionary 182 | represents a change that some remote user made to the text buffer. 183 | The order of the patches is significant. They should applied in 184 | the order they are found in this message. 185 | 186 | Each patch will have the form: 187 | 188 | { 189 | "oldStart": {"row": , "column": }, 190 | "oldEnd": {"row": , "column": }, 191 | "oldText": , 192 | "newStart": {"row": , "column": }, 193 | "newEnd": {"row": , "column": }, 194 | "newText": , 195 | } 196 | """ 197 | def __init__(self, patch_list): 198 | self.type = EditorProtocolMessageType.ApplyPatches 199 | self.patch_list = patch_list 200 | 201 | def to_payload(self): 202 | return { 203 | "patch_list": self.patch_list 204 | } 205 | 206 | @staticmethod 207 | def from_payload(payload): 208 | return ApplyPatches(payload["patch_list"]) 209 | 210 | 211 | class HostSession: 212 | """ 213 | Sent by the plugin to the agent to ask it to start hosting a new 214 | session. 215 | """ 216 | def __init__(self): 217 | self.type = EditorProtocolMessageType.HostSession 218 | 219 | def to_payload(self): 220 | return {} 221 | 222 | @staticmethod 223 | def from_payload(payload): 224 | return HostSession() 225 | 226 | 227 | class JoinSession: 228 | """ 229 | Sent by the plugin to the agent to ask it to join an existing 230 | session. 231 | """ 232 | def __init__(self, session_id): 233 | self.type = EditorProtocolMessageType.JoinSession 234 | self.session_id = session_id 235 | 236 | def to_payload(self): 237 | return { 238 | "session_id": str(self.session_id), 239 | } 240 | 241 | @staticmethod 242 | def from_payload(payload): 243 | return JoinSession(payload["session_id"]) 244 | 245 | 246 | class SessionInfo: 247 | """ 248 | Sent by the agent to the plugin to pass it the session ID. 249 | """ 250 | def __init__(self, session_id): 251 | self.type = EditorProtocolMessageType.SessionInfo 252 | self.session_id = session_id 253 | 254 | def to_payload(self): 255 | return { 256 | "session_id": str(self.session_id), 257 | } 258 | 259 | @staticmethod 260 | def from_payload(payload): 261 | return SessionInfo(payload["session_id"]) 262 | 263 | 264 | def serialize(message): 265 | as_dict = { 266 | "type": message.type.value, 267 | "payload": message.to_payload(), 268 | "version": 1, 269 | } 270 | return json.dumps(as_dict) 271 | 272 | 273 | def deserialize(data): 274 | try: 275 | as_dict = json.loads(data) 276 | message_type = as_dict["type"] 277 | payload = as_dict["payload"] 278 | 279 | if message_type == EditorProtocolMessageType.ConnectTo.value: 280 | return ConnectTo.from_payload(payload) 281 | 282 | elif message_type == EditorProtocolMessageType.WriteRequest.value: 283 | return WriteRequest.from_payload(payload) 284 | 285 | elif message_type == EditorProtocolMessageType.WriteRequestAck.value: 286 | return WriteRequestAck.from_payload(payload) 287 | 288 | elif message_type == \ 289 | EditorProtocolMessageType.UserChangedEditorText.value: 290 | return UserChangedEditorText.from_payload(payload) 291 | 292 | elif message_type == EditorProtocolMessageType.ApplyText.value: 293 | return ApplyText.from_payload(payload) 294 | 295 | elif message_type == EditorProtocolMessageType.NewPatches.value: 296 | return NewPatches.from_payload(payload) 297 | 298 | elif message_type == EditorProtocolMessageType.ApplyPatches.value: 299 | return ApplyPatches.from_payload(payload) 300 | 301 | elif message_type == EditorProtocolMessageType.CheckDocumentSync.value: 302 | return CheckDocumentSync.from_payload(payload) 303 | 304 | elif message_type == EditorProtocolMessageType.HostSession.value: 305 | return HostSession.from_payload(payload) 306 | 307 | elif message_type == EditorProtocolMessageType.JoinSession.value: 308 | return JoinSession.from_payload(payload) 309 | 310 | elif message_type == EditorProtocolMessageType.SessionInfo.value: 311 | return SessionInfo.from_payload(payload) 312 | 313 | else: 314 | raise EditorProtocolMarshalError 315 | 316 | except: 317 | raise EditorProtocolMarshalError 318 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | -------------------------------------------------------------------------------- /plugin/tandem_lib/tandem_plugin.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import random 4 | from time import sleep 5 | 6 | from subprocess import Popen, PIPE 7 | from threading import Thread, Event 8 | 9 | from diff_match_patch import diff_match_patch 10 | import agent.tandem.agent.protocol.messages.editor as m 11 | from agent.tandem.agent.configuration import BASE_DIR 12 | 13 | DEBUG = True 14 | is_active = False 15 | patch = diff_match_patch() 16 | 17 | 18 | def spawn_agent(extra_args=None): 19 | if extra_args is None: 20 | extra_args = [] 21 | return Popen( 22 | ["python3", os.path.join(BASE_DIR, "main.py")] + extra_args, 23 | stdin=PIPE, 24 | stdout=PIPE, 25 | ) 26 | 27 | 28 | def get_string_port(): 29 | starting_port = random.randint(60600, 62600) 30 | return str(starting_port) 31 | 32 | 33 | def index_to_point(buffer_line_lengths, index): 34 | index_left = index 35 | for i in range(len(buffer_line_lengths)): 36 | if index_left >= buffer_line_lengths[i] + 1: 37 | index_left -= buffer_line_lengths[i] + 1 38 | else: 39 | return (i, index_left) 40 | 41 | 42 | def error(): 43 | print "An error occurred." 44 | global DEBUG 45 | if DEBUG: 46 | raise 47 | 48 | 49 | class TandemPlugin(object): 50 | def __init__(self, vim, message_handler, on_start=lambda: None): 51 | self._vim = vim 52 | self._message_handler = message_handler 53 | self._on_start = on_start 54 | 55 | def _initialize(self): 56 | self._buffer_contents = [''] 57 | 58 | if self._connect_to is not None: 59 | self._vim.command('enew') 60 | 61 | self._output_checker = Thread(target=self._agent_listener) 62 | 63 | self._text_applied = Event() 64 | 65 | def _start_agent(self): 66 | self._agent_port = get_string_port() 67 | self._agent = spawn_agent([ 68 | "--port", 69 | self._agent_port, 70 | "--log-file", 71 | "/tmp/tandem-agent-{}.log".format(self._agent_port), 72 | ]) 73 | self._agent_stdout_iter = iter(self._agent.stdout.readline, b"") 74 | 75 | if self._connect_to is not None: 76 | message = m.JoinSession(self._connect_to) 77 | else: 78 | message = m.HostSession() 79 | self._agent.stdin.write(m.serialize(message)) 80 | self._agent.stdin.write("\n") 81 | self._agent.stdin.flush() 82 | 83 | self._output_checker.start() 84 | 85 | def _check_document_sync(self): 86 | global is_active 87 | while is_active: 88 | with self._read_write_check: 89 | if not is_active: 90 | break 91 | 92 | target_buffer_contents = self._target_buffer[:] 93 | message = m.CheckDocumentSync(target_buffer_contents) 94 | 95 | self._agent.stdin.write(m.serialize(message)) 96 | self._agent.stdin.write("\n") 97 | self._agent.stdin.flush() 98 | 99 | sleep(0.5) 100 | 101 | def _shut_down_agent(self): 102 | self._agent_stdout_iter = None 103 | self._agent.stdin.close() 104 | self._agent.terminate() 105 | self._agent.wait() 106 | 107 | def check_buffer(self): 108 | global is_active 109 | if not is_active: 110 | return 111 | 112 | target_buffer_contents = self._target_buffer[:] 113 | 114 | if len(target_buffer_contents) != len(self._buffer_contents): 115 | self._send_patches(target_buffer_contents) 116 | else: 117 | for i in range(len(target_buffer_contents)): 118 | if target_buffer_contents[i] != self._buffer_contents[i]: 119 | self._send_patches(target_buffer_contents) 120 | break 121 | 122 | self._buffer_contents = target_buffer_contents 123 | 124 | def _create_patch(self, start, end, text): 125 | if start is None or end is None or text is None: 126 | # Raise an error if in debug mode, otherwise return None 127 | if DEBUG: 128 | raise ValueError("Start, end, or text is None!") 129 | else: 130 | return None 131 | 132 | result = [] 133 | 134 | if not (start[0] == end[0] and start[1] == end[1]): 135 | result.append({ 136 | "start": {"row": start[0], "column": start[1]}, 137 | "end": {"row": end[0], "column": end[1]}, 138 | "text": "", 139 | }) 140 | 141 | if text: 142 | result.append({ 143 | "start": {"row": start[0], "column": start[1]}, 144 | "end": {"row": 0, "column": 0}, 145 | "text": text, 146 | }) 147 | 148 | return result 149 | 150 | def _send_patches(self, target_buffer_contents): 151 | try : 152 | prev_contents = os.linesep.join(self._buffer_contents) 153 | curr_contents = os.linesep.join(target_buffer_contents) 154 | diff_patches = patch.patch_make(prev_contents, curr_contents) 155 | 156 | patches = [] 157 | length_buffer = [len(x) for x in self._buffer_contents] 158 | 159 | for p in diff_patches: 160 | start_index = p.start1 161 | end_index = p.start1 + p.length1 162 | 163 | start_index_offset = 0 164 | end_index_offset = 0 165 | 166 | while(len(p.diffs)): 167 | (op, data) = p.diffs[0] 168 | if (op != diff_match_patch.DIFF_EQUAL): 169 | break 170 | start_index_offset = start_index_offset + len(data) 171 | p.diffs.pop(0) 172 | 173 | while(len(p.diffs)): 174 | (op, data) = p.diffs[-1] 175 | if (op != diff_match_patch.DIFF_EQUAL): 176 | break 177 | end_index_offset = end_index_offset + len(data) 178 | p.diffs.pop() 179 | 180 | start_rc = index_to_point(length_buffer, start_index + start_index_offset) 181 | end_rc = index_to_point(length_buffer, end_index - end_index_offset) 182 | 183 | text = [] 184 | 185 | for (op, data) in p.diffs: 186 | if op == diff_match_patch.DIFF_INSERT or op == diff_match_patch.DIFF_EQUAL: 187 | text.append(data) 188 | 189 | text = "".join(text) 190 | 191 | text_lengths = [len(word) for word in text.split(os.linesep)] 192 | 193 | if start_rc[0] == end_rc[0]: 194 | length_buffer[start_rc[0]] += text_lengths[0] 195 | length_buffer[start_rc[0]] -= end_rc[1] - start_rc[1] 196 | length_buffer[start_rc[0] + 1 : start_rc[0] + 1] = text_lengths[1:] 197 | else: 198 | if len(text_lengths) > 1: 199 | length_buffer[start_rc[0]] = start_rc[1] + text_lengths[0] 200 | length_buffer[end_rc[0]] = length_buffer[end_rc[0]] - end_rc[1] + text_lengths[-1] 201 | length_buffer[start_rc[0] + 1 : end_rc[0]] = text_lengths[1:-1] 202 | else: 203 | length_buffer[start_rc[0]] = start_rc[1] + text_lengths[0] + length_buffer[end_rc[0]] - end_rc[1] 204 | length_buffer[start_rc[0] + 1 : end_rc[0] + 1] = [] 205 | 206 | patches.extend( 207 | self._create_patch(start_rc, end_rc, text) 208 | ) 209 | 210 | patches = [p for p in patches if p is not None] 211 | if len(patches) > 0: 212 | message = m.NewPatches(patches) 213 | self._agent.stdin.write(m.serialize(message)) 214 | 215 | self._agent.stdin.write("\n") 216 | self._agent.stdin.flush() 217 | except: 218 | error() 219 | 220 | def _agent_listener(self): 221 | while True: 222 | message = self._read_message() 223 | if message is None: 224 | break 225 | self._handle_message(message) 226 | 227 | def _read_message(self): 228 | try: 229 | binary_line = next(self._agent_stdout_iter) 230 | line = binary_line.decode("utf-8") 231 | return m.deserialize(line) 232 | except StopIteration: 233 | return None 234 | 235 | def handle_apply_text(self, message): 236 | self._target_buffer[:] = message.contents 237 | self._buffer_contents = self._target_buffer[:] 238 | self._vim.command(":redraw") 239 | 240 | def handle_write_request(self, message): 241 | # Flush out any non-diff'd changes first 242 | self.check_buffer() 243 | 244 | # Allow agent to apply remote operations 245 | self._agent.stdin.write(m.serialize(m.WriteRequestAck(message.seq))) 246 | self._agent.stdin.write("\n") 247 | self._agent.stdin.flush() 248 | 249 | # Apply results of the remote operations 250 | apply_patches_message = self._read_message() 251 | if not isinstance(apply_patches_message, m.ApplyPatches): 252 | raise ValueError("Invalid protocol message!") 253 | self._handle_apply_patches(apply_patches_message) 254 | 255 | def _handle_apply_patches(self, message): 256 | for patch in message.patch_list: 257 | start = patch["oldStart"] 258 | end = patch["oldEnd"] 259 | text = patch["newText"] 260 | 261 | target_buffer_contents = self._target_buffer[:] 262 | 263 | before_in_new_line = target_buffer_contents[start["row"]][:start["column"]] 264 | after_in_new_line = target_buffer_contents[end["row"]][end["column"]:] 265 | 266 | new_lines = text.split(os.linesep) 267 | if len(new_lines) > 0: 268 | new_lines[0] = before_in_new_line + new_lines[0] 269 | else: 270 | new_lines = [before_in_new_line] 271 | 272 | new_lines[-1] = new_lines[-1] + after_in_new_line 273 | 274 | self._target_buffer[start["row"] : end["row"] + 1] = new_lines 275 | 276 | self._buffer_contents = self._target_buffer[:] 277 | self._vim.command(":redraw") 278 | 279 | def _handle_message(self, message): 280 | self._message_handler(message) 281 | 282 | def start(self, session_id=None): 283 | global is_active 284 | if is_active: 285 | print "Cannot start. An instance is already running on :{}".format(self._agent_port) 286 | return 287 | 288 | self._connect_to = session_id 289 | 290 | self._target_buffer = self._vim.current.buffer 291 | 292 | if self._target_buffer.options['modified'] and self._connect_to is not None: 293 | print "Cannot start. There are unsaved changes in this buffer" 294 | return 295 | 296 | self._initialize() 297 | 298 | self._start_agent() 299 | is_active = True 300 | 301 | self._on_start() 302 | 303 | if self._connect_to is None: 304 | self.check_buffer() 305 | 306 | def stop(self, invoked_from_autocmd=True): 307 | global is_active 308 | if not is_active: 309 | if not invoked_from_autocmd: 310 | print "No instance running." 311 | return 312 | 313 | is_active = False 314 | 315 | self._shut_down_agent() 316 | 317 | if self._output_checker.isAlive(): 318 | self._output_checker.join() 319 | -------------------------------------------------------------------------------- /plugin/tandem_lib/diff_match_patch.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | import time 4 | 5 | __author__ = 'fraser@google.com (Neil Fraser)' 6 | 7 | """Diff Match and Patch 8 | 9 | Copyright 2006 Google Inc. 10 | http://code.google.com/p/google-diff-match-patch/ 11 | 12 | Licensed under the Apache License, Version 2.0 (the "License"); 13 | you may not use this file except in compliance with the License. 14 | You may obtain a copy of the License at 15 | 16 | http://www.apache.org/licenses/LICENSE-2.0 17 | 18 | Unless required by applicable law or agreed to in writing, software 19 | distributed under the License is distributed on an "AS IS" BASIS, 20 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 21 | See the License for the specific language governing permissions and 22 | limitations under the License. 23 | """ 24 | 25 | """Functions for diff, match and patch. 26 | 27 | Computes the difference between two texts to create a patch. 28 | Applies the patch onto another text, allowing for errors. 29 | """ 30 | 31 | try: 32 | from urllib import parse 33 | assert parse 34 | 35 | def unquote_py3(x): 36 | return parse.unquote(x) 37 | unquote = unquote_py3 38 | str_instances = str 39 | unichr = chr 40 | except ImportError: 41 | import urllib as parse 42 | 43 | def unquote_py2(x): 44 | return parse.unquote(x.encode('utf-8')).decode('utf-8') 45 | unquote = unquote_py2 46 | import __builtin__ 47 | str_instances = (str, __builtin__.basestring) 48 | 49 | 50 | class diff_match_patch: 51 | """Class containing the diff, match and patch methods. 52 | 53 | Also contains the behaviour settings. 54 | """ 55 | 56 | def __init__(self): 57 | """Inits a diff_match_patch object with default settings. 58 | Redefine these in your program to override the defaults. 59 | """ 60 | 61 | # Number of seconds to map a diff before giving up (0 for infinity). 62 | self.Diff_Timeout = 1.0 63 | # Cost of an empty edit operation in terms of edit characters. 64 | self.Diff_EditCost = 4 65 | # At what point is no match declared (0.0 = perfection, 1.0 = very loose). 66 | # self.Match_Threshold = 0.5 67 | self.Match_Threshold = 0.375 68 | # How far to search for a match (0 = exact location, 1000+ = broad match). 69 | # A match this many characters away from the expected location will add 70 | # 1.0 to the score (0.0 is a perfect match). 71 | # self.Match_Distance = 1000 72 | self.Match_Distance = 100 73 | # When deleting a large block of text (over ~64 characters), how close do 74 | # the contents have to be to match the expected contents. (0.0 = perfection, 75 | # 1.0 = very loose). Note that Match_Threshold controls how closely the 76 | # end points of a delete need to match. 77 | # self.Patch_DeleteThreshold = 0.5 78 | self.Patch_DeleteThreshold = 0.375 79 | # Chunk size for context length. 80 | self.Patch_Margin = 4 81 | 82 | # The number of bits in an int. 83 | # Python has no maximum, thus to disable patch splitting set to 0. 84 | # However to avoid long patches in certain pathological cases, use 32. 85 | # Multiple short patches (using native ints) are much faster than long ones. 86 | self.Match_MaxBits = 32 87 | 88 | # DIFF FUNCTIONS 89 | 90 | # The data structure representing a diff is an array of tuples: 91 | # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")] 92 | # which means: delete "Hello", add "Goodbye" and keep " world." 93 | DIFF_DELETE = -1 94 | DIFF_INSERT = 1 95 | DIFF_EQUAL = 0 96 | 97 | def diff_main(self, text1, text2, checklines=True, deadline=None): 98 | """Find the differences between two texts. Simplifies the problem by 99 | stripping any common prefix or suffix off the texts before diffing. 100 | 101 | Args: 102 | text1: Old string to be diffed. 103 | text2: New string to be diffed. 104 | checklines: Optional speedup flag. If present and false, then don't run 105 | a line-level diff first to identify the changed areas. 106 | Defaults to true, which does a faster, slightly less optimal diff. 107 | deadline: Optional time when the diff should be complete by. Used 108 | internally for recursive calls. Users should set DiffTimeout instead. 109 | 110 | Returns: 111 | Array of changes. 112 | """ 113 | # Set a deadline by which time the diff must be complete. 114 | if deadline is None: 115 | # Unlike in most languages, Python counts time in seconds. 116 | if self.Diff_Timeout <= 0: 117 | deadline = sys.maxsize 118 | else: 119 | deadline = time.time() + self.Diff_Timeout 120 | 121 | # Check for null inputs. 122 | if text1 is None or text2 is None: 123 | raise ValueError("Null inputs. (diff_main)") 124 | 125 | # Check for equality (speedup). 126 | if text1 == text2: 127 | if text1: 128 | return [(self.DIFF_EQUAL, text1)] 129 | return [] 130 | 131 | # Trim off common prefix (speedup). 132 | commonlength = self.diff_commonPrefix(text1, text2) 133 | commonprefix = text1[:commonlength] 134 | text1 = text1[commonlength:] 135 | text2 = text2[commonlength:] 136 | 137 | # Trim off common suffix (speedup). 138 | commonlength = self.diff_commonSuffix(text1, text2) 139 | if commonlength == 0: 140 | commonsuffix = '' 141 | else: 142 | commonsuffix = text1[-commonlength:] 143 | text1 = text1[:-commonlength] 144 | text2 = text2[:-commonlength] 145 | 146 | # Compute the diff on the middle block. 147 | diffs = self.diff_compute(text1, text2, checklines, deadline) 148 | 149 | # Restore the prefix and suffix. 150 | if commonprefix: 151 | diffs[:0] = [(self.DIFF_EQUAL, commonprefix)] 152 | if commonsuffix: 153 | diffs.append((self.DIFF_EQUAL, commonsuffix)) 154 | self.diff_cleanupMerge(diffs) 155 | return diffs 156 | 157 | def diff_compute(self, text1, text2, checklines, deadline): 158 | """Find the differences between two texts. Assumes that the texts do not 159 | have any common prefix or suffix. 160 | 161 | Args: 162 | text1: Old string to be diffed. 163 | text2: New string to be diffed. 164 | checklines: Speedup flag. If false, then don't run a line-level diff 165 | first to identify the changed areas. 166 | If true, then run a faster, slightly less optimal diff. 167 | deadline: Time when the diff should be complete by. 168 | 169 | Returns: 170 | Array of changes. 171 | """ 172 | if not text1: 173 | # Just add some text (speedup). 174 | return [(self.DIFF_INSERT, text2)] 175 | 176 | if not text2: 177 | # Just delete some text (speedup). 178 | return [(self.DIFF_DELETE, text1)] 179 | 180 | if len(text1) > len(text2): 181 | (longtext, shorttext) = (text1, text2) 182 | else: 183 | (shorttext, longtext) = (text1, text2) 184 | i = longtext.find(shorttext) 185 | if i != -1: 186 | # Shorter text is inside the longer text (speedup). 187 | diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext), 188 | (self.DIFF_INSERT, longtext[i + len(shorttext):])] 189 | # Swap insertions for deletions if diff is reversed. 190 | if len(text1) > len(text2): 191 | diffs[0] = (self.DIFF_DELETE, diffs[0][1]) 192 | diffs[2] = (self.DIFF_DELETE, diffs[2][1]) 193 | return diffs 194 | 195 | if len(shorttext) == 1: 196 | # Single character string. 197 | # After the previous speedup, the character can't be an equality. 198 | return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] 199 | longtext = shorttext = None # Garbage collect. 200 | 201 | # Check to see if the problem can be split in two. 202 | hm = self.diff_halfMatch(text1, text2) 203 | if hm: 204 | # A half-match was found, sort out the return data. 205 | (text1_a, text1_b, text2_a, text2_b, mid_common) = hm 206 | # Send both pairs off for separate processing. 207 | diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline) 208 | diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline) 209 | # Merge the results. 210 | return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b 211 | 212 | if checklines and len(text1) > 100 and len(text2) > 100: 213 | return self.diff_lineMode(text1, text2, deadline) 214 | 215 | return self.diff_bisect(text1, text2, deadline) 216 | 217 | def diff_lineMode(self, text1, text2, deadline): 218 | """Do a quick line-level diff on both strings, then rediff the parts for 219 | greater accuracy. 220 | This speedup can produce non-minimal diffs. 221 | 222 | Args: 223 | text1: Old string to be diffed. 224 | text2: New string to be diffed. 225 | deadline: Time when the diff should be complete by. 226 | 227 | Returns: 228 | Array of changes. 229 | """ 230 | 231 | # Scan the text on a line-by-line basis first. 232 | (text1, text2, linearray) = self.diff_linesToChars(text1, text2) 233 | 234 | diffs = self.diff_main(text1, text2, False, deadline) 235 | 236 | # Convert the diff back to original text. 237 | self.diff_charsToLines(diffs, linearray) 238 | # Eliminate freak matches (e.g. blank lines) 239 | self.diff_cleanupSemantic(diffs) 240 | 241 | # Rediff any replacement blocks, this time character-by-character. 242 | # Add a dummy entry at the end. 243 | diffs.append((self.DIFF_EQUAL, '')) 244 | pointer = 0 245 | count_delete = 0 246 | count_insert = 0 247 | text_delete = '' 248 | text_insert = '' 249 | while pointer < len(diffs): 250 | if diffs[pointer][0] == self.DIFF_INSERT: 251 | count_insert += 1 252 | text_insert += diffs[pointer][1] 253 | elif diffs[pointer][0] == self.DIFF_DELETE: 254 | count_delete += 1 255 | text_delete += diffs[pointer][1] 256 | elif diffs[pointer][0] == self.DIFF_EQUAL: 257 | # Upon reaching an equality, check for prior redundancies. 258 | if count_delete >= 1 and count_insert >= 1: 259 | # Delete the offending records and add the merged ones. 260 | a = self.diff_main(text_delete, text_insert, False, deadline) 261 | diffs[(pointer - count_delete - count_insert):pointer] = a 262 | pointer = pointer - count_delete - count_insert + len(a) 263 | count_insert = 0 264 | count_delete = 0 265 | text_delete = '' 266 | text_insert = '' 267 | 268 | pointer += 1 269 | 270 | diffs.pop() # Remove the dummy entry at the end. 271 | 272 | return diffs 273 | 274 | def diff_bisect(self, text1, text2, deadline): 275 | """Find the 'middle snake' of a diff, split the problem in two 276 | and return the recursively constructed diff. 277 | See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. 278 | 279 | Args: 280 | text1: Old string to be diffed. 281 | text2: New string to be diffed. 282 | deadline: Time at which to bail if not yet complete. 283 | 284 | Returns: 285 | Array of diff tuples. 286 | """ 287 | 288 | # Cache the text lengths to prevent multiple calls. 289 | text1_length = len(text1) 290 | text2_length = len(text2) 291 | max_d = (text1_length + text2_length + 1) // 2 292 | v_offset = max_d 293 | v_length = 2 * max_d 294 | v1 = [-1] * v_length 295 | v1[v_offset + 1] = 0 296 | v2 = v1[:] 297 | delta = text1_length - text2_length 298 | # If the total number of characters is odd, then the front path will 299 | # collide with the reverse path. 300 | front = (delta % 2 != 0) 301 | # Offsets for start and end of k loop. 302 | # Prevents mapping of space beyond the grid. 303 | k1start = 0 304 | k1end = 0 305 | k2start = 0 306 | k2end = 0 307 | for d in range(max_d): 308 | # Bail out if deadline is reached. 309 | if time.time() > deadline: 310 | break 311 | 312 | # Walk the front path one step. 313 | for k1 in range(-d + k1start, d + 1 - k1end, 2): 314 | k1_offset = v_offset + k1 315 | if k1 == -d or (k1 != d and v1[k1_offset - 1] < v1[k1_offset + 1]): 316 | x1 = v1[k1_offset + 1] 317 | else: 318 | x1 = v1[k1_offset - 1] + 1 319 | y1 = x1 - k1 320 | while (x1 < text1_length and y1 < text2_length and text1[x1] == text2[y1]): 321 | x1 += 1 322 | y1 += 1 323 | v1[k1_offset] = x1 324 | if x1 > text1_length: 325 | # Ran off the right of the graph. 326 | k1end += 2 327 | elif y1 > text2_length: 328 | # Ran off the bottom of the graph. 329 | k1start += 2 330 | elif front: 331 | k2_offset = v_offset + delta - k1 332 | if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1: 333 | # Mirror x2 onto top-left coordinate system. 334 | x2 = text1_length - v2[k2_offset] 335 | if x1 >= x2: 336 | # Overlap detected. 337 | return self.diff_bisectSplit(text1, text2, x1, y1, deadline) 338 | 339 | # Walk the reverse path one step. 340 | for k2 in range(-d + k2start, d + 1 - k2end, 2): 341 | k2_offset = v_offset + k2 342 | if k2 == -d or (k2 != d and v2[k2_offset - 1] < v2[k2_offset + 1]): 343 | x2 = v2[k2_offset + 1] 344 | else: 345 | x2 = v2[k2_offset - 1] + 1 346 | y2 = x2 - k2 347 | while (x2 < text1_length and y2 < text2_length and text1[-x2 - 1] == text2[-y2 - 1]): 348 | x2 += 1 349 | y2 += 1 350 | v2[k2_offset] = x2 351 | if x2 > text1_length: 352 | # Ran off the left of the graph. 353 | k2end += 2 354 | elif y2 > text2_length: 355 | # Ran off the top of the graph. 356 | k2start += 2 357 | elif not front: 358 | k1_offset = v_offset + delta - k2 359 | if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1: 360 | x1 = v1[k1_offset] 361 | y1 = v_offset + x1 - k1_offset 362 | # Mirror x2 onto top-left coordinate system. 363 | x2 = text1_length - x2 364 | if x1 >= x2: 365 | # Overlap detected. 366 | return self.diff_bisectSplit(text1, text2, x1, y1, deadline) 367 | 368 | # Diff took too long and hit the deadline or 369 | # number of diffs equals number of characters, no commonality at all. 370 | return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] 371 | 372 | def diff_bisectSplit(self, text1, text2, x, y, deadline): 373 | """Given the location of the 'middle snake', split the diff in two parts 374 | and recurse. 375 | 376 | Args: 377 | text1: Old string to be diffed. 378 | text2: New string to be diffed. 379 | x: Index of split point in text1. 380 | y: Index of split point in text2. 381 | deadline: Time at which to bail if not yet complete. 382 | 383 | Returns: 384 | Array of diff tuples. 385 | """ 386 | text1a = text1[:x] 387 | text2a = text2[:y] 388 | text1b = text1[x:] 389 | text2b = text2[y:] 390 | 391 | # Compute both diffs serially. 392 | diffs = self.diff_main(text1a, text2a, False, deadline) 393 | diffsb = self.diff_main(text1b, text2b, False, deadline) 394 | 395 | return diffs + diffsb 396 | 397 | def diff_linesToChars(self, text1, text2): 398 | """Split two texts into an array of strings. Reduce the texts to a string 399 | of hashes where each Unicode character represents one line. 400 | 401 | Args: 402 | text1: First string. 403 | text2: Second string. 404 | 405 | Returns: 406 | Three element tuple, containing the encoded text1, the encoded text2 and 407 | the array of unique strings. The zeroth element of the array of unique 408 | strings is intentionally blank. 409 | """ 410 | lineArray = [] # e.g. lineArray[4] == "Hello\n" 411 | lineHash = {} # e.g. lineHash["Hello\n"] == 4 412 | 413 | # "\x00" is a valid character, but various debuggers don't like it. 414 | # So we'll insert a junk entry to avoid generating a null character. 415 | lineArray.append('') 416 | 417 | def diff_linesToCharsMunge(text): 418 | """Split a text into an array of strings. Reduce the texts to a string 419 | of hashes where each Unicode character represents one line. 420 | Modifies linearray and linehash through being a closure. 421 | 422 | Args: 423 | text: String to encode. 424 | 425 | Returns: 426 | Encoded string. 427 | """ 428 | chars = [] 429 | # Walk the text, pulling out a substring for each line. 430 | # text.split('\n') would would temporarily double our memory footprint. 431 | # Modifying text would create many large strings to garbage collect. 432 | lineStart = 0 433 | lineEnd = -1 434 | while lineEnd < len(text) - 1: 435 | lineEnd = text.find('\n', lineStart) 436 | if lineEnd == -1: 437 | lineEnd = len(text) - 1 438 | line = text[lineStart:lineEnd + 1] 439 | lineStart = lineEnd + 1 440 | 441 | if line in lineHash: 442 | chars.append(unichr(lineHash[line])) 443 | else: 444 | lineArray.append(line) 445 | lineHash[line] = len(lineArray) - 1 446 | chars.append(unichr(len(lineArray) - 1)) 447 | return "".join(chars) 448 | 449 | chars1 = diff_linesToCharsMunge(text1) 450 | chars2 = diff_linesToCharsMunge(text2) 451 | return (chars1, chars2, lineArray) 452 | 453 | def diff_charsToLines(self, diffs, lineArray): 454 | """Rehydrate the text in a diff from a string of line hashes to real lines 455 | of text. 456 | 457 | Args: 458 | diffs: Array of diff tuples. 459 | lineArray: Array of unique strings. 460 | """ 461 | for x in range(len(diffs)): 462 | text = [] 463 | for char in diffs[x][1]: 464 | text.append(lineArray[ord(char)]) 465 | diffs[x] = (diffs[x][0], "".join(text)) 466 | 467 | def diff_commonPrefix(self, text1, text2): 468 | """Determine the common prefix of two strings. 469 | 470 | Args: 471 | text1: First string. 472 | text2: Second string. 473 | 474 | Returns: 475 | The number of characters common to the start of each string. 476 | """ 477 | # Quick check for common null cases. 478 | if not text1 or not text2 or text1[0] != text2[0]: 479 | return 0 480 | # Binary search. 481 | # Performance analysis: http://neil.fraser.name/news/2007/10/09/ 482 | pointermin = 0 483 | pointermax = min(len(text1), len(text2)) 484 | pointermid = pointermax 485 | pointerstart = 0 486 | while pointermin < pointermid: 487 | if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]: 488 | pointermin = pointermid 489 | pointerstart = pointermin 490 | else: 491 | pointermax = pointermid 492 | pointermid = (pointermax - pointermin) // 2 + pointermin 493 | return pointermid 494 | 495 | def diff_commonSuffix(self, text1, text2): 496 | """Determine the common suffix of two strings. 497 | 498 | Args: 499 | text1: First string. 500 | text2: Second string. 501 | 502 | Returns: 503 | The number of characters common to the end of each string. 504 | """ 505 | # Quick check for common null cases. 506 | if not text1 or not text2 or text1[-1] != text2[-1]: 507 | return 0 508 | # Binary search. 509 | # Performance analysis: http://neil.fraser.name/news/2007/10/09/ 510 | pointermin = 0 511 | pointermax = min(len(text1), len(text2)) 512 | pointermid = pointermax 513 | pointerend = 0 514 | while pointermin < pointermid: 515 | if text1[-pointermid:len(text1) - pointerend] == text2[-pointermid:len(text2) - pointerend]: 516 | pointermin = pointermid 517 | pointerend = pointermin 518 | else: 519 | pointermax = pointermid 520 | pointermid = (pointermax - pointermin) // 2 + pointermin 521 | return pointermid 522 | 523 | def diff_commonOverlap(self, text1, text2): 524 | """Determine if the suffix of one string is the prefix of another. 525 | 526 | Args: 527 | text1 First string. 528 | text2 Second string. 529 | 530 | Returns: 531 | The number of characters common to the end of the first 532 | string and the start of the second string. 533 | """ 534 | # Cache the text lengths to prevent multiple calls. 535 | text1_length = len(text1) 536 | text2_length = len(text2) 537 | # Eliminate the null case. 538 | if text1_length == 0 or text2_length == 0: 539 | return 0 540 | # Truncate the longer string. 541 | if text1_length > text2_length: 542 | text1 = text1[-text2_length:] 543 | elif text1_length < text2_length: 544 | text2 = text2[:text1_length] 545 | text_length = min(text1_length, text2_length) 546 | # Quick check for the worst case. 547 | if text1 == text2: 548 | return text_length 549 | 550 | # Start by looking for a single character match 551 | # and increase length until no match is found. 552 | # Performance analysis: http://neil.fraser.name/news/2010/11/04/ 553 | best = 0 554 | length = 1 555 | while True: 556 | pattern = text1[-length:] 557 | found = text2.find(pattern) 558 | if found == -1: 559 | return best 560 | length += found 561 | if found == 0 or text1[-length:] == text2[:length]: 562 | best = length 563 | length += 1 564 | 565 | def diff_halfMatch(self, text1, text2): 566 | """Do the two texts share a substring which is at least half the length of 567 | the longer text? 568 | This speedup can produce non-minimal diffs. 569 | 570 | Args: 571 | text1: First string. 572 | text2: Second string. 573 | 574 | Returns: 575 | Five element Array, containing the prefix of text1, the suffix of text1, 576 | the prefix of text2, the suffix of text2 and the common middle. Or None 577 | if there was no match. 578 | """ 579 | if self.Diff_Timeout <= 0: 580 | # Don't risk returning a non-optimal diff if we have unlimited time. 581 | return None 582 | if len(text1) > len(text2): 583 | (longtext, shorttext) = (text1, text2) 584 | else: 585 | (shorttext, longtext) = (text1, text2) 586 | if len(longtext) < 4 or len(shorttext) * 2 < len(longtext): 587 | return None # Pointless. 588 | 589 | def diff_halfMatchI(longtext, shorttext, i): 590 | """Does a substring of shorttext exist within longtext such that the 591 | substring is at least half the length of longtext? 592 | Closure, but does not reference any external variables. 593 | 594 | Args: 595 | longtext: Longer string. 596 | shorttext: Shorter string. 597 | i: Start index of quarter length substring within longtext. 598 | 599 | Returns: 600 | Five element Array, containing the prefix of longtext, the suffix of 601 | longtext, the prefix of shorttext, the suffix of shorttext and the 602 | common middle. Or None if there was no match. 603 | """ 604 | seed = longtext[i:i + len(longtext) // 4] 605 | best_common = '' 606 | j = shorttext.find(seed) 607 | while j != -1: 608 | prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:]) 609 | suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j]) 610 | if len(best_common) < suffixLength + prefixLength: 611 | best_common = (shorttext[j - suffixLength:j] + shorttext[j:j + prefixLength]) 612 | best_longtext_a = longtext[:i - suffixLength] 613 | best_longtext_b = longtext[i + prefixLength:] 614 | best_shorttext_a = shorttext[:j - suffixLength] 615 | best_shorttext_b = shorttext[j + prefixLength:] 616 | j = shorttext.find(seed, j + 1) 617 | 618 | if len(best_common) * 2 >= len(longtext): 619 | return (best_longtext_a, best_longtext_b, 620 | best_shorttext_a, best_shorttext_b, best_common) 621 | else: 622 | return None 623 | 624 | # First check if the second quarter is the seed for a half-match. 625 | hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4) 626 | # Check again based on the third quarter. 627 | hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2) 628 | if not hm1 and not hm2: 629 | return None 630 | elif not hm2: 631 | hm = hm1 632 | elif not hm1: 633 | hm = hm2 634 | else: 635 | # Both matched. Select the longest. 636 | if len(hm1[4]) > len(hm2[4]): 637 | hm = hm1 638 | else: 639 | hm = hm2 640 | 641 | # A half-match was found, sort out the return data. 642 | if len(text1) > len(text2): 643 | (text1_a, text1_b, text2_a, text2_b, mid_common) = hm 644 | else: 645 | (text2_a, text2_b, text1_a, text1_b, mid_common) = hm 646 | return (text1_a, text1_b, text2_a, text2_b, mid_common) 647 | 648 | def diff_cleanupSemantic(self, diffs): 649 | """Reduce the number of edits by eliminating semantically trivial 650 | equalities. 651 | 652 | Args: 653 | diffs: Array of diff tuples. 654 | """ 655 | changes = False 656 | equalities = [] # Stack of indices where equalities are found. 657 | lastequality = None # Always equal to diffs[equalities[-1]][1] 658 | pointer = 0 # Index of current position. 659 | # Number of chars that changed prior to the equality. 660 | length_insertions1, length_deletions1 = 0, 0 661 | # Number of chars that changed after the equality. 662 | length_insertions2, length_deletions2 = 0, 0 663 | while pointer < len(diffs): 664 | if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. 665 | equalities.append(pointer) 666 | length_insertions1, length_insertions2 = length_insertions2, 0 667 | length_deletions1, length_deletions2 = length_deletions2, 0 668 | lastequality = diffs[pointer][1] 669 | else: # An insertion or deletion. 670 | if diffs[pointer][0] == self.DIFF_INSERT: 671 | length_insertions2 += len(diffs[pointer][1]) 672 | else: 673 | length_deletions2 += len(diffs[pointer][1]) 674 | # Eliminate an equality that is smaller or equal to the edits on both 675 | # sides of it. 676 | if (lastequality and (len(lastequality) <= 677 | max(length_insertions1, length_deletions1)) and 678 | (len(lastequality) <= max(length_insertions2, length_deletions2))): 679 | # Duplicate record. 680 | diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality)) 681 | # Change second copy to insert. 682 | diffs[equalities[-1] + 1] = (self.DIFF_INSERT, diffs[equalities[-1] + 1][1]) 683 | # Throw away the equality we just deleted. 684 | equalities.pop() 685 | # Throw away the previous equality (it needs to be reevaluated). 686 | if len(equalities): 687 | equalities.pop() 688 | if len(equalities): 689 | pointer = equalities[-1] 690 | else: 691 | pointer = -1 692 | # Reset the counters. 693 | length_insertions1, length_deletions1 = 0, 0 694 | length_insertions2, length_deletions2 = 0, 0 695 | lastequality = None 696 | changes = True 697 | pointer += 1 698 | 699 | # Normalize the diff. 700 | if changes: 701 | self.diff_cleanupMerge(diffs) 702 | self.diff_cleanupSemanticLossless(diffs) 703 | 704 | # Find any overlaps between deletions and insertions. 705 | # e.g: abcxxxxxxdef 706 | # -> abcxxxdef 707 | # e.g: xxxabcdefxxx 708 | # -> defxxxabc 709 | # Only extract an overlap if it is as big as the edit ahead or behind it. 710 | pointer = 1 711 | while pointer < len(diffs): 712 | if (diffs[pointer - 1][0] == self.DIFF_DELETE and 713 | diffs[pointer][0] == self.DIFF_INSERT): 714 | deletion = diffs[pointer - 1][1] 715 | insertion = diffs[pointer][1] 716 | overlap_length1 = self.diff_commonOverlap(deletion, insertion) 717 | overlap_length2 = self.diff_commonOverlap(insertion, deletion) 718 | if overlap_length1 >= overlap_length2: 719 | if (overlap_length1 >= len(deletion) / 2.0 or 720 | overlap_length1 >= len(insertion) / 2.0): 721 | # Overlap found. Insert an equality and trim the surrounding edits. 722 | diffs.insert(pointer, (self.DIFF_EQUAL, insertion[:overlap_length1])) 723 | diffs[pointer - 1] = (self.DIFF_DELETE, deletion[:len(deletion) - overlap_length1]) 724 | diffs[pointer + 1] = (self.DIFF_INSERT, insertion[overlap_length1:]) 725 | pointer += 1 726 | else: 727 | if (overlap_length2 >= len(deletion) / 2.0 or 728 | overlap_length2 >= len(insertion) / 2.0): 729 | # Reverse overlap found. 730 | # Insert an equality and swap and trim the surrounding edits. 731 | diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2])) 732 | diffs[pointer - 1] = (self.DIFF_INSERT, insertion[:len(insertion) - overlap_length2]) 733 | diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:]) 734 | pointer += 1 735 | pointer += 1 736 | pointer += 1 737 | 738 | def diff_cleanupSemanticLossless(self, diffs): 739 | """Look for single edits surrounded on both sides by equalities 740 | which can be shifted sideways to align the edit to a word boundary. 741 | e.g: The cat came. -> The cat came. 742 | 743 | Args: 744 | diffs: Array of diff tuples. 745 | """ 746 | 747 | def diff_cleanupSemanticScore(one, two): 748 | """Given two strings, compute a score representing whether the 749 | internal boundary falls on logical boundaries. 750 | Scores range from 6 (best) to 0 (worst). 751 | Closure, but does not reference any external variables. 752 | 753 | Args: 754 | one: First string. 755 | two: Second string. 756 | 757 | Returns: 758 | The score. 759 | """ 760 | if not one or not two: 761 | # Edges are the best. 762 | return 6 763 | 764 | # Each port of this function behaves slightly differently due to 765 | # subtle differences in each language's definition of things like 766 | # 'whitespace'. Since this function's purpose is largely cosmetic, 767 | # the choice has been made to use each language's native features 768 | # rather than force total conformity. 769 | char1 = one[-1] 770 | char2 = two[0] 771 | nonAlphaNumeric1 = not char1.isalnum() 772 | nonAlphaNumeric2 = not char2.isalnum() 773 | whitespace1 = nonAlphaNumeric1 and char1.isspace() 774 | whitespace2 = nonAlphaNumeric2 and char2.isspace() 775 | lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n") 776 | lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n") 777 | blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one) 778 | blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two) 779 | 780 | if blankLine1 or blankLine2: 781 | # Five points for blank lines. 782 | return 5 783 | elif lineBreak1 or lineBreak2: 784 | # Four points for line breaks. 785 | return 4 786 | elif nonAlphaNumeric1 and not whitespace1 and whitespace2: 787 | # Three points for end of sentences. 788 | return 3 789 | elif whitespace1 or whitespace2: 790 | # Two points for whitespace. 791 | return 2 792 | elif nonAlphaNumeric1 or nonAlphaNumeric2: 793 | # One point for non-alphanumeric. 794 | return 1 795 | return 0 796 | 797 | pointer = 1 798 | # Intentionally ignore the first and last element (don't need checking). 799 | while pointer < len(diffs) - 1: 800 | if (diffs[pointer - 1][0] == self.DIFF_EQUAL and 801 | diffs[pointer + 1][0] == self.DIFF_EQUAL): 802 | # This is a single edit surrounded by equalities. 803 | equality1 = diffs[pointer - 1][1] 804 | edit = diffs[pointer][1] 805 | equality2 = diffs[pointer + 1][1] 806 | 807 | # First, shift the edit as far left as possible. 808 | commonOffset = self.diff_commonSuffix(equality1, edit) 809 | if commonOffset: 810 | commonString = edit[-commonOffset:] 811 | equality1 = equality1[:-commonOffset] 812 | edit = commonString + edit[:-commonOffset] 813 | equality2 = commonString + equality2 814 | 815 | # Second, step character by character right, looking for the best fit. 816 | bestEquality1 = equality1 817 | bestEdit = edit 818 | bestEquality2 = equality2 819 | bestScore = (diff_cleanupSemanticScore(equality1, edit) + 820 | diff_cleanupSemanticScore(edit, equality2)) 821 | while edit and equality2 and edit[0] == equality2[0]: 822 | equality1 += edit[0] 823 | edit = edit[1:] + equality2[0] 824 | equality2 = equality2[1:] 825 | score = (diff_cleanupSemanticScore(equality1, edit) + 826 | diff_cleanupSemanticScore(edit, equality2)) 827 | # The >= encourages trailing rather than leading whitespace on edits. 828 | if score >= bestScore: 829 | bestScore = score 830 | bestEquality1 = equality1 831 | bestEdit = edit 832 | bestEquality2 = equality2 833 | 834 | if diffs[pointer - 1][1] != bestEquality1: 835 | # We have an improvement, save it back to the diff. 836 | if bestEquality1: 837 | diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1) 838 | else: 839 | del diffs[pointer - 1] 840 | pointer -= 1 841 | diffs[pointer] = (diffs[pointer][0], bestEdit) 842 | if bestEquality2: 843 | diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2) 844 | else: 845 | del diffs[pointer + 1] 846 | pointer -= 1 847 | pointer += 1 848 | 849 | # Define some regex patterns for matching boundaries. 850 | BLANKLINEEND = re.compile(r"\n\r?\n$") 851 | BLANKLINESTART = re.compile(r"^\r?\n\r?\n") 852 | 853 | def diff_cleanupEfficiency(self, diffs): 854 | """Reduce the number of edits by eliminating operationally trivial 855 | equalities. 856 | 857 | Args: 858 | diffs: Array of diff tuples. 859 | """ 860 | changes = False 861 | equalities = [] # Stack of indices where equalities are found. 862 | lastequality = None # Always equal to diffs[equalities[-1]][1] 863 | pointer = 0 # Index of current position. 864 | pre_ins = False # Is there an insertion operation before the last equality. 865 | pre_del = False # Is there a deletion operation before the last equality. 866 | post_ins = False # Is there an insertion operation after the last equality. 867 | post_del = False # Is there a deletion operation after the last equality. 868 | while pointer < len(diffs): 869 | if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. 870 | if (len(diffs[pointer][1]) < self.Diff_EditCost and 871 | (post_ins or post_del)): 872 | # Candidate found. 873 | equalities.append(pointer) 874 | pre_ins = post_ins 875 | pre_del = post_del 876 | lastequality = diffs[pointer][1] 877 | else: 878 | # Not a candidate, and can never become one. 879 | equalities = [] 880 | lastequality = None 881 | 882 | post_ins = post_del = False 883 | else: # An insertion or deletion. 884 | if diffs[pointer][0] == self.DIFF_DELETE: 885 | post_del = True 886 | else: 887 | post_ins = True 888 | 889 | # Five types to be split: 890 | # ABXYCD 891 | # AXCD 892 | # ABXC 893 | # AXCD 894 | # ABXC 895 | 896 | if lastequality and ((pre_ins and pre_del and post_ins and post_del) or 897 | ((len(lastequality) < self.Diff_EditCost / 2) and 898 | (pre_ins + pre_del + post_ins + post_del) == 3)): 899 | # Duplicate record. 900 | diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality)) 901 | # Change second copy to insert. 902 | diffs[equalities[-1] + 1] = (self.DIFF_INSERT, diffs[equalities[-1] + 1][1]) 903 | equalities.pop() # Throw away the equality we just deleted. 904 | lastequality = None 905 | if pre_ins and pre_del: 906 | # No changes made which could affect previous entry, keep going. 907 | post_ins = post_del = True 908 | equalities = [] 909 | else: 910 | if len(equalities): 911 | equalities.pop() # Throw away the previous equality. 912 | if len(equalities): 913 | pointer = equalities[-1] 914 | else: 915 | pointer = -1 916 | post_ins = post_del = False 917 | changes = True 918 | pointer += 1 919 | 920 | if changes: 921 | self.diff_cleanupMerge(diffs) 922 | 923 | def diff_cleanupMerge(self, diffs): 924 | """Reorder and merge like edit sections. Merge equalities. 925 | Any edit section can move as long as it doesn't cross an equality. 926 | 927 | Args: 928 | diffs: Array of diff tuples. 929 | """ 930 | diffs.append((self.DIFF_EQUAL, '')) # Add a dummy entry at the end. 931 | pointer = 0 932 | count_delete = 0 933 | count_insert = 0 934 | text_delete = '' 935 | text_insert = '' 936 | while pointer < len(diffs): 937 | if diffs[pointer][0] == self.DIFF_INSERT: 938 | count_insert += 1 939 | text_insert += diffs[pointer][1] 940 | pointer += 1 941 | elif diffs[pointer][0] == self.DIFF_DELETE: 942 | count_delete += 1 943 | text_delete += diffs[pointer][1] 944 | pointer += 1 945 | elif diffs[pointer][0] == self.DIFF_EQUAL: 946 | # Upon reaching an equality, check for prior redundancies. 947 | if count_delete + count_insert > 1: 948 | if count_delete != 0 and count_insert != 0: 949 | # Factor out any common prefixies. 950 | commonlength = self.diff_commonPrefix(text_insert, text_delete) 951 | if commonlength != 0: 952 | x = pointer - count_delete - count_insert - 1 953 | if x >= 0 and diffs[x][0] == self.DIFF_EQUAL: 954 | diffs[x] = (diffs[x][0], diffs[x][1] + 955 | text_insert[:commonlength]) 956 | else: 957 | diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength])) 958 | pointer += 1 959 | text_insert = text_insert[commonlength:] 960 | text_delete = text_delete[commonlength:] 961 | # Factor out any common suffixies. 962 | commonlength = self.diff_commonSuffix(text_insert, text_delete) 963 | if commonlength != 0: 964 | diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] + 965 | diffs[pointer][1]) 966 | text_insert = text_insert[:-commonlength] 967 | text_delete = text_delete[:-commonlength] 968 | # Delete the offending records and add the merged ones. 969 | if count_delete == 0: 970 | diffs[(pointer - count_insert):pointer] = [(self.DIFF_INSERT, text_insert)] 971 | elif count_insert == 0: 972 | diffs[(pointer - count_delete):pointer] = [(self.DIFF_DELETE, text_delete)] 973 | else: 974 | diffs[(pointer - count_delete - count_insert):pointer] = [ 975 | (self.DIFF_DELETE, text_delete), 976 | (self.DIFF_INSERT, text_insert)] 977 | pointer = pointer - count_delete - count_insert + 1 978 | if count_delete != 0: 979 | pointer += 1 980 | if count_insert != 0: 981 | pointer += 1 982 | elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL: 983 | # Merge this equality with the previous one. 984 | diffs[pointer - 1] = (diffs[pointer - 1][0], diffs[pointer - 1][1] + diffs[pointer][1]) 985 | del diffs[pointer] 986 | else: 987 | pointer += 1 988 | 989 | count_insert = 0 990 | count_delete = 0 991 | text_delete = '' 992 | text_insert = '' 993 | 994 | if diffs[-1][1] == '': 995 | diffs.pop() # Remove the dummy entry at the end. 996 | 997 | # Second pass: look for single edits surrounded on both sides by equalities 998 | # which can be shifted sideways to eliminate an equality. 999 | # e.g: ABAC -> ABAC 1000 | changes = False 1001 | pointer = 1 1002 | # Intentionally ignore the first and last element (don't need checking). 1003 | while pointer < len(diffs) - 1: 1004 | if (diffs[pointer - 1][0] == self.DIFF_EQUAL and 1005 | diffs[pointer + 1][0] == self.DIFF_EQUAL): 1006 | # This is a single edit surrounded by equalities. 1007 | if diffs[pointer][1].endswith(diffs[pointer - 1][1]): 1008 | # Shift the edit over the previous equality. 1009 | diffs[pointer] = (diffs[pointer][0], 1010 | diffs[pointer - 1][1] + 1011 | diffs[pointer][1][:-len(diffs[pointer - 1][1])]) 1012 | diffs[pointer + 1] = (diffs[pointer + 1][0], 1013 | diffs[pointer - 1][1] + diffs[pointer + 1][1]) 1014 | del diffs[pointer - 1] 1015 | changes = True 1016 | elif diffs[pointer][1].startswith(diffs[pointer + 1][1]): 1017 | # Shift the edit over the next equality. 1018 | diffs[pointer - 1] = (diffs[pointer - 1][0], 1019 | diffs[pointer - 1][1] + diffs[pointer + 1][1]) 1020 | diffs[pointer] = (diffs[pointer][0], 1021 | diffs[pointer][1][len(diffs[pointer + 1][1]):] + 1022 | diffs[pointer + 1][1]) 1023 | del diffs[pointer + 1] 1024 | changes = True 1025 | pointer += 1 1026 | 1027 | # If shifts were made, the diff needs reordering and another shift sweep. 1028 | if changes: 1029 | self.diff_cleanupMerge(diffs) 1030 | 1031 | def diff_xIndex(self, diffs, loc): 1032 | """loc is a location in text1, compute and return the equivalent location 1033 | in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8 1034 | 1035 | Args: 1036 | diffs: Array of diff tuples. 1037 | loc: Location within text1. 1038 | 1039 | Returns: 1040 | Location within text2. 1041 | """ 1042 | chars1 = 0 1043 | chars2 = 0 1044 | last_chars1 = 0 1045 | last_chars2 = 0 1046 | for x in range(len(diffs)): 1047 | (op, text) = diffs[x] 1048 | if op != self.DIFF_INSERT: # Equality or deletion. 1049 | chars1 += len(text) 1050 | if op != self.DIFF_DELETE: # Equality or insertion. 1051 | chars2 += len(text) 1052 | if chars1 > loc: # Overshot the location. 1053 | break 1054 | last_chars1 = chars1 1055 | last_chars2 = chars2 1056 | 1057 | if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE: 1058 | # The location was deleted. 1059 | return last_chars2 1060 | # Add the remaining len(character). 1061 | return last_chars2 + (loc - last_chars1) 1062 | 1063 | def diff_prettyHtml(self, diffs): 1064 | """Convert a diff array into a pretty HTML report. 1065 | 1066 | Args: 1067 | diffs: Array of diff tuples. 1068 | 1069 | Returns: 1070 | HTML representation. 1071 | """ 1072 | html = [] 1073 | for (op, data) in diffs: 1074 | text = (data.replace("&", "&").replace("<", "<") 1075 | .replace(">", ">").replace("\n", "¶
")) 1076 | if op == self.DIFF_INSERT: 1077 | html.append("%s" % text) 1078 | elif op == self.DIFF_DELETE: 1079 | html.append("%s" % text) 1080 | elif op == self.DIFF_EQUAL: 1081 | html.append("%s" % text) 1082 | return "".join(html) 1083 | 1084 | def diff_text1(self, diffs): 1085 | """Compute and return the source text (all equalities and deletions). 1086 | 1087 | Args: 1088 | diffs: Array of diff tuples. 1089 | 1090 | Returns: 1091 | Source text. 1092 | """ 1093 | text = [] 1094 | for (op, data) in diffs: 1095 | if op != self.DIFF_INSERT: 1096 | text.append(data) 1097 | return "".join(text) 1098 | 1099 | def diff_text2(self, diffs): 1100 | """Compute and return the destination text (all equalities and insertions). 1101 | 1102 | Args: 1103 | diffs: Array of diff tuples. 1104 | 1105 | Returns: 1106 | Destination text. 1107 | """ 1108 | text = [] 1109 | for (op, data) in diffs: 1110 | if op != self.DIFF_DELETE: 1111 | text.append(data) 1112 | return "".join(text) 1113 | 1114 | def diff_levenshtein(self, diffs): 1115 | """Compute the Levenshtein distance; the number of inserted, deleted or 1116 | substituted characters. 1117 | 1118 | Args: 1119 | diffs: Array of diff tuples. 1120 | 1121 | Returns: 1122 | Number of changes. 1123 | """ 1124 | levenshtein = 0 1125 | insertions = 0 1126 | deletions = 0 1127 | for (op, data) in diffs: 1128 | if op == self.DIFF_INSERT: 1129 | insertions += len(data) 1130 | elif op == self.DIFF_DELETE: 1131 | deletions += len(data) 1132 | elif op == self.DIFF_EQUAL: 1133 | # A deletion and an insertion is one substitution. 1134 | levenshtein += max(insertions, deletions) 1135 | insertions = 0 1136 | deletions = 0 1137 | levenshtein += max(insertions, deletions) 1138 | return levenshtein 1139 | 1140 | def diff_toDelta(self, diffs): 1141 | """Crush the diff into an encoded string which describes the operations 1142 | required to transform text1 into text2. 1143 | E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. 1144 | Operations are tab-separated. Inserted text is escaped using %xx notation. 1145 | 1146 | Args: 1147 | diffs: Array of diff tuples. 1148 | 1149 | Returns: 1150 | Delta text. 1151 | """ 1152 | text = [] 1153 | for (op, data) in diffs: 1154 | if op == self.DIFF_INSERT: 1155 | # High ascii will raise UnicodeDecodeError. Use Unicode instead. 1156 | data = data.encode("utf-8") 1157 | text.append("+" + parse.quote(data, "!~*'();/?:@&=+$,# ")) 1158 | elif op == self.DIFF_DELETE: 1159 | text.append("-%d" % len(data)) 1160 | elif op == self.DIFF_EQUAL: 1161 | text.append("=%d" % len(data)) 1162 | return "\t".join(text) 1163 | 1164 | def diff_fromDelta(self, text1, delta): 1165 | """Given the original text1, and an encoded string which describes the 1166 | operations required to transform text1 into text2, compute the full diff. 1167 | 1168 | Args: 1169 | text1: Source string for the diff. 1170 | delta: Delta text. 1171 | 1172 | Returns: 1173 | Array of diff tuples. 1174 | 1175 | Raises: 1176 | ValueError: If invalid input. 1177 | """ 1178 | if type(delta) == str: 1179 | # Deltas should be composed of a subset of ascii chars, Unicode not 1180 | # required. If this encode raises UnicodeEncodeError, delta is invalid. 1181 | delta.encode("ascii") 1182 | diffs = [] 1183 | pointer = 0 # Cursor in text1 1184 | tokens = delta.split("\t") 1185 | for token in tokens: 1186 | if token == "": 1187 | # Blank tokens are ok (from a trailing \t). 1188 | continue 1189 | # Each token begins with a one character parameter which specifies the 1190 | # operation of this token (delete, insert, equality). 1191 | param = token[1:] 1192 | if token[0] == "+": 1193 | param = unquote(param) 1194 | diffs.append((self.DIFF_INSERT, param)) 1195 | elif token[0] == "-" or token[0] == "=": 1196 | try: 1197 | n = int(param) 1198 | except ValueError: 1199 | raise ValueError("Invalid number in diff_fromDelta: " + param) 1200 | if n < 0: 1201 | raise ValueError("Negative number in diff_fromDelta: " + param) 1202 | text = text1[pointer:(pointer + n)] 1203 | pointer += n 1204 | if token[0] == "=": 1205 | diffs.append((self.DIFF_EQUAL, text)) 1206 | else: 1207 | diffs.append((self.DIFF_DELETE, text)) 1208 | else: 1209 | # Anything else is an error. 1210 | raise ValueError("Invalid diff operation in diff_fromDelta: " + token[0]) 1211 | if pointer != len(text1): 1212 | raise ValueError( 1213 | "Delta length (%d) does not equal source text length (%d)." % 1214 | (pointer, len(text1))) 1215 | return diffs 1216 | 1217 | # MATCH FUNCTIONS 1218 | 1219 | def match_main(self, text, pattern, loc): 1220 | """Locate the best instance of 'pattern' in 'text' near 'loc'. 1221 | 1222 | Args: 1223 | text: The text to search. 1224 | pattern: The pattern to search for. 1225 | loc: The location to search around. 1226 | 1227 | Returns: 1228 | Best match index or -1. 1229 | """ 1230 | # Check for null inputs. 1231 | if text is None or pattern is None: 1232 | raise ValueError("Null inputs. (match_main)") 1233 | 1234 | loc = max(0, min(loc, len(text))) 1235 | if text == pattern: 1236 | # Shortcut (potentially not guaranteed by the algorithm) 1237 | return 0 1238 | elif not text: 1239 | # Nothing to match. 1240 | return -1 1241 | elif text[loc:loc + len(pattern)] == pattern: 1242 | # Perfect match at the perfect spot! (Includes case of null pattern) 1243 | return loc 1244 | else: 1245 | # Do a fuzzy compare. 1246 | match = self.match_bitap(text, pattern, loc) 1247 | return match 1248 | 1249 | def match_bitap(self, text, pattern, loc): 1250 | """Locate the best instance of 'pattern' in 'text' near 'loc' using the 1251 | Bitap algorithm. 1252 | 1253 | Args: 1254 | text: The text to search. 1255 | pattern: The pattern to search for. 1256 | loc: The location to search around. 1257 | 1258 | Returns: 1259 | Best match index or -1. 1260 | """ 1261 | # Python doesn't have a maxint limit, so ignore this check. 1262 | # if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits: 1263 | # raise ValueError("Pattern too long for this application.") 1264 | 1265 | # Initialise the alphabet. 1266 | s = self.match_alphabet(pattern) 1267 | 1268 | def match_bitapScore(e, x): 1269 | """Compute and return the score for a match with e errors and x location. 1270 | Accesses loc and pattern through being a closure. 1271 | 1272 | Args: 1273 | e: Number of errors in match. 1274 | x: Location of match. 1275 | 1276 | Returns: 1277 | Overall score for match (0.0 = good, 1.0 = bad). 1278 | """ 1279 | accuracy = float(e) / len(pattern) 1280 | proximity = abs(loc - x) 1281 | if not self.Match_Distance: 1282 | # Dodge divide by zero error. 1283 | return proximity and 1.0 or accuracy 1284 | return accuracy + (proximity / float(self.Match_Distance)) 1285 | 1286 | # Highest score beyond which we give up. 1287 | score_threshold = self.Match_Threshold 1288 | # Is there a nearby exact match? (speedup) 1289 | best_loc = text.find(pattern, loc) 1290 | if best_loc != -1: 1291 | score_threshold = min(match_bitapScore(0, best_loc), score_threshold) 1292 | # What about in the other direction? (speedup) 1293 | best_loc = text.rfind(pattern, loc + len(pattern)) 1294 | if best_loc != -1: 1295 | score_threshold = min(match_bitapScore(0, best_loc), score_threshold) 1296 | 1297 | # Initialise the bit arrays. 1298 | matchmask = 1 << (len(pattern) - 1) 1299 | best_loc = -1 1300 | 1301 | bin_max = len(pattern) + len(text) 1302 | # Empty initialization added to appease pychecker. 1303 | last_rd = None 1304 | for d in range(len(pattern)): 1305 | # Scan for the best match each iteration allows for one more error. 1306 | # Run a binary search to determine how far from 'loc' we can stray at 1307 | # this error level. 1308 | bin_min = 0 1309 | bin_mid = bin_max 1310 | while bin_min < bin_mid: 1311 | if match_bitapScore(d, loc + bin_mid) <= score_threshold: 1312 | bin_min = bin_mid 1313 | else: 1314 | bin_max = bin_mid 1315 | bin_mid = (bin_max - bin_min) // 2 + bin_min 1316 | 1317 | # Use the result from this iteration as the maximum for the next. 1318 | bin_max = bin_mid 1319 | start = max(1, loc - bin_mid + 1) 1320 | finish = min(loc + bin_mid, len(text)) + len(pattern) 1321 | 1322 | rd = [0] * (finish + 2) 1323 | rd[finish + 1] = (1 << d) - 1 1324 | for j in range(finish, start - 1, -1): 1325 | if len(text) <= j - 1: 1326 | # Out of range. 1327 | charMatch = 0 1328 | else: 1329 | charMatch = s.get(text[j - 1], 0) 1330 | if d == 0: # First pass: exact match. 1331 | rd[j] = ((rd[j + 1] << 1) | 1) & charMatch 1332 | else: # Subsequent passes: fuzzy match. 1333 | rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | ( 1334 | ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1] 1335 | if rd[j] & matchmask: 1336 | score = match_bitapScore(d, j - 1) 1337 | # This match will almost certainly be better than any existing match. 1338 | # But check anyway. 1339 | if score <= score_threshold: 1340 | # Told you so. 1341 | score_threshold = score 1342 | best_loc = j - 1 1343 | if best_loc > loc: 1344 | # When passing loc, don't exceed our current distance from loc. 1345 | start = max(1, 2 * loc - best_loc) 1346 | else: 1347 | # Already passed loc, downhill from here on in. 1348 | break 1349 | # No hope for a (better) match at greater error levels. 1350 | if match_bitapScore(d + 1, loc) > score_threshold: 1351 | break 1352 | last_rd = rd 1353 | return best_loc 1354 | 1355 | def match_alphabet(self, pattern): 1356 | """Initialise the alphabet for the Bitap algorithm. 1357 | 1358 | Args: 1359 | pattern: The text to encode. 1360 | 1361 | Returns: 1362 | Hash of character locations. 1363 | """ 1364 | s = {} 1365 | for char in pattern: 1366 | s[char] = 0 1367 | for i in range(len(pattern)): 1368 | s[pattern[i]] |= 1 << (len(pattern) - i - 1) 1369 | return s 1370 | 1371 | # PATCH FUNCTIONS 1372 | 1373 | def patch_addContext(self, patch, text): 1374 | """Increase the context until it is unique, 1375 | but don't let the pattern expand beyond Match_MaxBits. 1376 | 1377 | Args: 1378 | patch: The patch to grow. 1379 | text: Source text. 1380 | """ 1381 | if len(text) == 0: 1382 | return 1383 | pattern = text[patch.start2:(patch.start2 + patch.length1)] 1384 | padding = 0 1385 | 1386 | # Look for the first and last matches of pattern in text. If two different 1387 | # matches are found, increase the pattern length. 1388 | while (text.find(pattern) != text.rfind(pattern) and 1389 | (self.Match_MaxBits == 0 or 1390 | len(pattern) < self.Match_MaxBits - self.Patch_Margin - 1391 | self.Patch_Margin)): 1392 | padding += self.Patch_Margin 1393 | pattern = text[max(0, patch.start2 - padding):(patch.start2 + patch.length1 + padding)] 1394 | # Add one chunk for good luck. 1395 | padding += self.Patch_Margin 1396 | 1397 | # Add the prefix. 1398 | prefix = text[max(0, patch.start2 - padding):patch.start2] 1399 | if prefix: 1400 | patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)] 1401 | # Add the suffix. 1402 | suffix = text[(patch.start2 + patch.length1):(patch.start2 + patch.length1 + padding)] 1403 | if suffix: 1404 | patch.diffs.append((self.DIFF_EQUAL, suffix)) 1405 | 1406 | # Roll back the start points. 1407 | patch.start1 -= len(prefix) 1408 | patch.start2 -= len(prefix) 1409 | # Extend lengths. 1410 | patch.length1 += len(prefix) + len(suffix) 1411 | patch.length2 += len(prefix) + len(suffix) 1412 | 1413 | def patch_make(self, a, b=None, c=None): 1414 | """Compute a list of patches to turn text1 into text2. 1415 | Use diffs if provided, otherwise compute it ourselves. 1416 | There are four ways to call this function, depending on what data is 1417 | available to the caller: 1418 | Method 1: 1419 | a = text1, b = text2 1420 | Method 2: 1421 | a = diffs 1422 | Method 3 (optimal): 1423 | a = text1, b = diffs 1424 | Method 4 (deprecated, use method 3): 1425 | a = text1, b = text2, c = diffs 1426 | 1427 | Args: 1428 | a: text1 (methods 1,3,4) or Array of diff tuples for text1 to 1429 | text2 (method 2). 1430 | b: text2 (methods 1,4) or Array of diff tuples for text1 to 1431 | text2 (method 3) or undefined (method 2). 1432 | c: Array of diff tuples for text1 to text2 (method 4) or 1433 | undefined (methods 1,2,3). 1434 | 1435 | Returns: 1436 | Array of Patch objects. 1437 | """ 1438 | text1 = None 1439 | diffs = None 1440 | # Note that texts may arrive as 'str' or 'unicode'. 1441 | if isinstance(a, str_instances) and isinstance(b, str_instances) and c is None: 1442 | # Method 1: text1, text2 1443 | # Compute diffs from text1 and text2. 1444 | text1 = a 1445 | diffs = self.diff_main(text1, b, True) 1446 | if len(diffs) > 2: 1447 | self.diff_cleanupSemantic(diffs) 1448 | self.diff_cleanupEfficiency(diffs) 1449 | elif isinstance(a, list) and b is None and c is None: 1450 | # Method 2: diffs 1451 | # Compute text1 from diffs. 1452 | diffs = a 1453 | text1 = self.diff_text1(diffs) 1454 | elif isinstance(a, str_instances) and isinstance(b, list) and c is None: 1455 | # Method 3: text1, diffs 1456 | text1 = a 1457 | diffs = b 1458 | elif (isinstance(a, str_instances) and isinstance(b, str_instances) and isinstance(c, list)): 1459 | # Method 4: text1, text2, diffs 1460 | # text2 is not used. 1461 | text1 = a 1462 | diffs = c 1463 | else: 1464 | raise ValueError("Unknown call format to patch_make.") 1465 | 1466 | if not diffs: 1467 | return [] # Get rid of the None case. 1468 | patches = [] 1469 | patch = patch_obj() 1470 | char_count1 = 0 # Number of characters into the text1 string. 1471 | char_count2 = 0 # Number of characters into the text2 string. 1472 | prepatch_text = text1 # Recreate the patches to determine context info. 1473 | postpatch_text = text1 1474 | for x in range(len(diffs)): 1475 | (diff_type, diff_text) = diffs[x] 1476 | if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL: 1477 | # A new patch starts here. 1478 | patch.start1 = char_count1 1479 | patch.start2 = char_count2 1480 | if diff_type == self.DIFF_INSERT: 1481 | # Insertion 1482 | patch.diffs.append(diffs[x]) 1483 | patch.length2 += len(diff_text) 1484 | postpatch_text = (postpatch_text[:char_count2] + diff_text + 1485 | postpatch_text[char_count2:]) 1486 | elif diff_type == self.DIFF_DELETE: 1487 | # Deletion. 1488 | patch.length1 += len(diff_text) 1489 | patch.diffs.append(diffs[x]) 1490 | postpatch_text = (postpatch_text[:char_count2] + 1491 | postpatch_text[char_count2 + len(diff_text):]) 1492 | elif (diff_type == self.DIFF_EQUAL and 1493 | len(diff_text) <= 2 * self.Patch_Margin and 1494 | len(patch.diffs) != 0 and len(diffs) != x + 1): 1495 | # Small equality inside a patch. 1496 | patch.diffs.append(diffs[x]) 1497 | patch.length1 += len(diff_text) 1498 | patch.length2 += len(diff_text) 1499 | 1500 | if (diff_type == self.DIFF_EQUAL and 1501 | len(diff_text) >= 2 * self.Patch_Margin): 1502 | # Time for a new patch. 1503 | if len(patch.diffs) != 0: 1504 | self.patch_addContext(patch, prepatch_text) 1505 | patches.append(patch) 1506 | patch = patch_obj() 1507 | # Unlike Unidiff, our patch lists have a rolling context. 1508 | # http://code.google.com/p/google-diff-match-patch/wiki/Unidiff 1509 | # Update prepatch text & pos to reflect the application of the 1510 | # just completed patch. 1511 | prepatch_text = postpatch_text 1512 | char_count1 = char_count2 1513 | 1514 | # Update the current character count. 1515 | if diff_type != self.DIFF_INSERT: 1516 | char_count1 += len(diff_text) 1517 | if diff_type != self.DIFF_DELETE: 1518 | char_count2 += len(diff_text) 1519 | 1520 | # Pick up the leftover patch if not empty. 1521 | if len(patch.diffs) != 0: 1522 | self.patch_addContext(patch, prepatch_text) 1523 | patches.append(patch) 1524 | return patches 1525 | 1526 | def patch_deepCopy(self, patches): 1527 | """Given an array of patches, return another array that is identical. 1528 | 1529 | Args: 1530 | patches: Array of Patch objects. 1531 | 1532 | Returns: 1533 | Array of Patch objects. 1534 | """ 1535 | patchesCopy = [] 1536 | for patch in patches: 1537 | patchCopy = patch_obj() 1538 | # No need to deep copy the tuples since they are immutable. 1539 | patchCopy.diffs = patch.diffs[:] 1540 | patchCopy.start1 = patch.start1 1541 | patchCopy.start2 = patch.start2 1542 | patchCopy.length1 = patch.length1 1543 | patchCopy.length2 = patch.length2 1544 | patchesCopy.append(patchCopy) 1545 | return patchesCopy 1546 | 1547 | def patch_apply(self, patches, text): 1548 | """Merge a set of patches onto the text. Return a patched text, as well 1549 | as a list of true/false values indicating which patches were applied. 1550 | 1551 | Args: 1552 | patches: Array of Patch objects. 1553 | text: Old text. 1554 | 1555 | Returns: 1556 | Two element Array, containing the new text and an array of boolean values. 1557 | """ 1558 | if not patches: 1559 | return (text, []) 1560 | 1561 | # Deep copy the patches so that no changes are made to originals. 1562 | patches = self.patch_deepCopy(patches) 1563 | 1564 | nullPadding = self.patch_addPadding(patches) 1565 | text = nullPadding + text + nullPadding 1566 | self.patch_splitMax(patches) 1567 | 1568 | # delta keeps track of the offset between the expected and actual location 1569 | # of the previous patch. If there are patches expected at positions 10 and 1570 | # 20, but the first patch was found at 12, delta is 2 and the second patch 1571 | # has an effective expected position of 22. 1572 | delta = 0 1573 | results = [] 1574 | for patch in patches: 1575 | expected_loc = patch.start2 + delta 1576 | text1 = self.diff_text1(patch.diffs) 1577 | end_loc = -1 1578 | if len(text1) > self.Match_MaxBits: 1579 | # patch_splitMax will only provide an oversized pattern in the case of 1580 | # a monster delete. 1581 | start_loc = self.match_main(text, text1[:self.Match_MaxBits], expected_loc) 1582 | if start_loc != -1: 1583 | end_loc = self.match_main(text, text1[-self.Match_MaxBits:], 1584 | expected_loc + len(text1) - self.Match_MaxBits) 1585 | if end_loc == -1 or start_loc >= end_loc: 1586 | # Can't find valid trailing context. Drop this patch. 1587 | start_loc = -1 1588 | else: 1589 | start_loc = self.match_main(text, text1, expected_loc) 1590 | if start_loc == -1: 1591 | # No match found. :( 1592 | results.append(False) 1593 | # Subtract the delta for this failed patch from subsequent patches. 1594 | delta -= patch.length2 - patch.length1 1595 | else: 1596 | # Found a match. :) 1597 | results.append(True) 1598 | delta = start_loc - expected_loc 1599 | if end_loc == -1: 1600 | text2 = text[start_loc:(start_loc + len(text1))] 1601 | else: 1602 | text2 = text[start_loc:(end_loc + self.Match_MaxBits)] 1603 | if text1 == text2: 1604 | # Perfect match, just shove the replacement text in. 1605 | text = (text[:start_loc] + self.diff_text2(patch.diffs) + 1606 | text[start_loc + len(text1):]) 1607 | else: 1608 | # Imperfect match. 1609 | # Run a diff to get a framework of equivalent indices. 1610 | diffs = self.diff_main(text1, text2, False) 1611 | if (len(text1) > self.Match_MaxBits and 1612 | self.diff_levenshtein(diffs) / float(len(text1)) > 1613 | self.Patch_DeleteThreshold): 1614 | # The end points match, but the content is unacceptably bad. 1615 | results[-1] = False 1616 | else: 1617 | self.diff_cleanupSemanticLossless(diffs) 1618 | index1 = 0 1619 | for (op, data) in patch.diffs: 1620 | if op != self.DIFF_EQUAL: 1621 | index2 = self.diff_xIndex(diffs, index1) 1622 | if op == self.DIFF_INSERT: # Insertion 1623 | text = text[:start_loc + index2] + data + text[start_loc + index2:] 1624 | elif op == self.DIFF_DELETE: # Deletion 1625 | text = text[:start_loc + index2] + text[start_loc + self.diff_xIndex(diffs, index1 + len(data)):] 1626 | if op != self.DIFF_DELETE: 1627 | index1 += len(data) 1628 | # Strip the padding off. 1629 | text = text[len(nullPadding):-len(nullPadding)] 1630 | return (text, results) 1631 | 1632 | def patch_addPadding(self, patches): 1633 | """Add some padding on text start and end so that edges can match 1634 | something. Intended to be called only from within patch_apply. 1635 | 1636 | Args: 1637 | patches: Array of Patch objects. 1638 | 1639 | Returns: 1640 | The padding string added to each side. 1641 | """ 1642 | paddingLength = self.Patch_Margin 1643 | nullPadding = "" 1644 | for x in range(1, paddingLength + 1): 1645 | nullPadding += unichr(x) 1646 | 1647 | # Bump all the patches forward. 1648 | for patch in patches: 1649 | patch.start1 += paddingLength 1650 | patch.start2 += paddingLength 1651 | 1652 | # Add some padding on start of first diff. 1653 | patch = patches[0] 1654 | diffs = patch.diffs 1655 | if not diffs or diffs[0][0] != self.DIFF_EQUAL: 1656 | # Add nullPadding equality. 1657 | diffs.insert(0, (self.DIFF_EQUAL, nullPadding)) 1658 | patch.start1 -= paddingLength # Should be 0. 1659 | patch.start2 -= paddingLength # Should be 0. 1660 | patch.length1 += paddingLength 1661 | patch.length2 += paddingLength 1662 | elif paddingLength > len(diffs[0][1]): 1663 | # Grow first equality. 1664 | extraLength = paddingLength - len(diffs[0][1]) 1665 | newText = nullPadding[len(diffs[0][1]):] + diffs[0][1] 1666 | diffs[0] = (diffs[0][0], newText) 1667 | patch.start1 -= extraLength 1668 | patch.start2 -= extraLength 1669 | patch.length1 += extraLength 1670 | patch.length2 += extraLength 1671 | 1672 | # Add some padding on end of last diff. 1673 | patch = patches[-1] 1674 | diffs = patch.diffs 1675 | if not diffs or diffs[-1][0] != self.DIFF_EQUAL: 1676 | # Add nullPadding equality. 1677 | diffs.append((self.DIFF_EQUAL, nullPadding)) 1678 | patch.length1 += paddingLength 1679 | patch.length2 += paddingLength 1680 | elif paddingLength > len(diffs[-1][1]): 1681 | # Grow last equality. 1682 | extraLength = paddingLength - len(diffs[-1][1]) 1683 | newText = diffs[-1][1] + nullPadding[:extraLength] 1684 | diffs[-1] = (diffs[-1][0], newText) 1685 | patch.length1 += extraLength 1686 | patch.length2 += extraLength 1687 | 1688 | return nullPadding 1689 | 1690 | def patch_splitMax(self, patches): 1691 | """Look through the patches and break up any which are longer than the 1692 | maximum limit of the match algorithm. 1693 | Intended to be called only from within patch_apply. 1694 | 1695 | Args: 1696 | patches: Array of Patch objects. 1697 | """ 1698 | patch_size = self.Match_MaxBits 1699 | if patch_size == 0: 1700 | # Python has the option of not splitting strings due to its ability 1701 | # to handle integers of arbitrary precision. 1702 | return 1703 | for x in range(len(patches)): 1704 | if patches[x].length1 <= patch_size: 1705 | continue 1706 | bigpatch = patches[x] 1707 | # Remove the big old patch. 1708 | del patches[x] 1709 | x -= 1 1710 | start1 = bigpatch.start1 1711 | start2 = bigpatch.start2 1712 | precontext = '' 1713 | while len(bigpatch.diffs) != 0: 1714 | # Create one of several smaller patches. 1715 | patch = patch_obj() 1716 | empty = True 1717 | patch.start1 = start1 - len(precontext) 1718 | patch.start2 = start2 - len(precontext) 1719 | if precontext: 1720 | patch.length1 = patch.length2 = len(precontext) 1721 | patch.diffs.append((self.DIFF_EQUAL, precontext)) 1722 | 1723 | while (len(bigpatch.diffs) != 0 and 1724 | patch.length1 < patch_size - self.Patch_Margin): 1725 | (diff_type, diff_text) = bigpatch.diffs[0] 1726 | if diff_type == self.DIFF_INSERT: 1727 | # Insertions are harmless. 1728 | patch.length2 += len(diff_text) 1729 | start2 += len(diff_text) 1730 | patch.diffs.append(bigpatch.diffs.pop(0)) 1731 | empty = False 1732 | elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and 1733 | patch.diffs[0][0] == self.DIFF_EQUAL and 1734 | len(diff_text) > 2 * patch_size): 1735 | # This is a large deletion. Let it pass in one chunk. 1736 | patch.length1 += len(diff_text) 1737 | start1 += len(diff_text) 1738 | empty = False 1739 | patch.diffs.append((diff_type, diff_text)) 1740 | del bigpatch.diffs[0] 1741 | else: 1742 | # Deletion or equality. Only take as much as we can stomach. 1743 | diff_text = diff_text[:patch_size - patch.length1 - self.Patch_Margin] 1744 | patch.length1 += len(diff_text) 1745 | start1 += len(diff_text) 1746 | if diff_type == self.DIFF_EQUAL: 1747 | patch.length2 += len(diff_text) 1748 | start2 += len(diff_text) 1749 | else: 1750 | empty = False 1751 | 1752 | patch.diffs.append((diff_type, diff_text)) 1753 | if diff_text == bigpatch.diffs[0][1]: 1754 | del bigpatch.diffs[0] 1755 | else: 1756 | bigpatch.diffs[0] = (bigpatch.diffs[0][0], bigpatch.diffs[0][1][len(diff_text):]) 1757 | 1758 | # Compute the head context for the next patch. 1759 | precontext = self.diff_text2(patch.diffs) 1760 | precontext = precontext[-self.Patch_Margin:] 1761 | # Append the end context for this patch. 1762 | postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin] 1763 | if postcontext: 1764 | patch.length1 += len(postcontext) 1765 | patch.length2 += len(postcontext) 1766 | if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL: 1767 | patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] + postcontext) 1768 | else: 1769 | patch.diffs.append((self.DIFF_EQUAL, postcontext)) 1770 | 1771 | if not empty: 1772 | x += 1 1773 | patches.insert(x, patch) 1774 | 1775 | def patch_toText(self, patches): 1776 | """Take a list of patches and return a textual representation. 1777 | 1778 | Args: 1779 | patches: Array of Patch objects. 1780 | 1781 | Returns: 1782 | Text representation of patches. 1783 | """ 1784 | text = [] 1785 | for patch in patches: 1786 | text.append(str(patch)) 1787 | return "".join(text) 1788 | 1789 | def patch_fromText(self, textline): 1790 | """Parse a textual representation of patches and return a list of patch 1791 | objects. 1792 | 1793 | Args: 1794 | textline: Text representation of patches. 1795 | 1796 | Returns: 1797 | Array of Patch objects. 1798 | 1799 | Raises: 1800 | ValueError: If invalid input. 1801 | """ 1802 | patches = [] 1803 | if not textline: 1804 | return patches 1805 | text = textline.split('\n') 1806 | while len(text) != 0: 1807 | m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0]) 1808 | if not m: 1809 | raise ValueError("Invalid patch string: " + text[0]) 1810 | patch = patch_obj() 1811 | patches.append(patch) 1812 | patch.start1 = int(m.group(1)) 1813 | if m.group(2) == '': 1814 | patch.start1 -= 1 1815 | patch.length1 = 1 1816 | elif m.group(2) == '0': 1817 | patch.length1 = 0 1818 | else: 1819 | patch.start1 -= 1 1820 | patch.length1 = int(m.group(2)) 1821 | 1822 | patch.start2 = int(m.group(3)) 1823 | if m.group(4) == '': 1824 | patch.start2 -= 1 1825 | patch.length2 = 1 1826 | elif m.group(4) == '0': 1827 | patch.length2 = 0 1828 | else: 1829 | patch.start2 -= 1 1830 | patch.length2 = int(m.group(4)) 1831 | 1832 | del text[0] 1833 | 1834 | while len(text) != 0: 1835 | if text[0]: 1836 | sign = text[0][0] 1837 | else: 1838 | sign = '' 1839 | line = unquote(text[0][1:]) 1840 | if sign == '+': 1841 | # Insertion. 1842 | patch.diffs.append((self.DIFF_INSERT, line)) 1843 | elif sign == '-': 1844 | # Deletion. 1845 | patch.diffs.append((self.DIFF_DELETE, line)) 1846 | elif sign == ' ': 1847 | # Minor equality. 1848 | patch.diffs.append((self.DIFF_EQUAL, line)) 1849 | elif sign == '@': 1850 | # Start of next patch. 1851 | break 1852 | elif sign == '': 1853 | # Blank line? Whatever. 1854 | pass 1855 | else: 1856 | # WTF? 1857 | raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line)) 1858 | del text[0] 1859 | return patches 1860 | 1861 | 1862 | class patch_obj: 1863 | """Class representing one patch operation. 1864 | """ 1865 | 1866 | def __init__(self): 1867 | """Initializes with an empty list of diffs. 1868 | """ 1869 | self.diffs = [] 1870 | self.start1 = None 1871 | self.start2 = None 1872 | self.length1 = 0 1873 | self.length2 = 0 1874 | 1875 | def __str__(self): 1876 | """Emmulate GNU diff's format. 1877 | Header: @@ -382,8 +481,9 @@ 1878 | Indicies are printed as 1-based, not 0-based. 1879 | 1880 | Returns: 1881 | The GNU diff string. 1882 | """ 1883 | if self.length1 == 0: 1884 | coords1 = str(self.start1) + ",0" 1885 | elif self.length1 == 1: 1886 | coords1 = str(self.start1 + 1) 1887 | else: 1888 | coords1 = str(self.start1 + 1) + "," + str(self.length1) 1889 | if self.length2 == 0: 1890 | coords2 = str(self.start2) + ",0" 1891 | elif self.length2 == 1: 1892 | coords2 = str(self.start2 + 1) 1893 | else: 1894 | coords2 = str(self.start2 + 1) + "," + str(self.length2) 1895 | text = ["@@ -", coords1, " +", coords2, " @@\n"] 1896 | # Escape the body of the patch with %xx notation. 1897 | for (op, data) in self.diffs: 1898 | if op == diff_match_patch.DIFF_INSERT: 1899 | text.append("+") 1900 | elif op == diff_match_patch.DIFF_DELETE: 1901 | text.append("-") 1902 | elif op == diff_match_patch.DIFF_EQUAL: 1903 | text.append(" ") 1904 | # High ascii will raise UnicodeDecodeError. Use Unicode instead. 1905 | data = data.encode("utf-8") 1906 | text.append(parse.quote(data, "!~*'();/?:@&=+$,# ") + "\n") 1907 | return "".join(text) 1908 | --------------------------------------------------------------------------------