diff --git a/README.md b/README.md
index 8fbde2e8..6a40229b 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ Medacy can be installed for general use or for pipeline development / research p
 After installing medaCy and [medaCy's clinical model](examples/models/clinical_notes_model.md), simply run:
 
 ```python
-from medacy.model import Model
+from medacy.ner.model import Model
 
 model = Model.load_external('medacy_model_clinical_notes')
 annotation = model.predict("The patient was prescribed 1 capsule of Advil for 5 days.")
diff --git a/docs/source/medacy.model.feature_extractor.rst b/docs/source/medacy.model.feature_extractor.rst
deleted file mode 100644
index 013a99c7..00000000
--- a/docs/source/medacy.model.feature_extractor.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-medacy.model.feature\_extractor module
-======================================
-
-.. automodule:: medacy.model.feature_extractor
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/source/medacy.model.rst b/docs/source/medacy.model.rst
deleted file mode 100644
index 0695ea40..00000000
--- a/docs/source/medacy.model.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-medacy.model package
-====================
-
-.. toctree::
-
-    medacy.model.feature_extractor
-    medacy.model.model
-    medacy.model.stratified_k_fold
diff --git a/docs/source/medacy.model.model.rst b/docs/source/medacy.ner.model.model.rst
similarity index 54%
rename from docs/source/medacy.model.model.rst
rename to docs/source/medacy.ner.model.model.rst
index 0eddf1fb..e5b4fc06 100644
--- a/docs/source/medacy.model.model.rst
+++ b/docs/source/medacy.ner.model.model.rst
@@ -1,7 +1,7 @@
-medacy.model.model module
+medacy.ner.model.model module
 =========================
 
-.. automodule:: medacy.model.model
+.. automodule:: medacy.ner.model.model
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/docs/source/medacy.ner.model.rst b/docs/source/medacy.ner.model.rst
new file mode 100644
index 00000000..1a48d393
--- /dev/null
+++ b/docs/source/medacy.ner.model.rst
@@ -0,0 +1,7 @@
+medacy.ner.model package
+====================
+
+.. toctree::
+
+    medacy.ner.model.model
+    medacy.ner.model.stratified_k_fold
\ No newline at end of file
diff --git a/docs/source/medacy.model.stratified_k_fold.rst b/docs/source/medacy.ner.model.stratified_k_fold.rst
similarity index 50%
rename from docs/source/medacy.model.stratified_k_fold.rst
rename to docs/source/medacy.ner.model.stratified_k_fold.rst
index cc3213cf..4f1cb965 100644
--- a/docs/source/medacy.model.stratified_k_fold.rst
+++ b/docs/source/medacy.ner.model.stratified_k_fold.rst
@@ -1,7 +1,7 @@
-medacy.model.stratified\_k\_fold module
+medacy.ner.model.stratified\_k\_fold module
 =======================================
 
-.. automodule:: medacy.model.stratified_k_fold
+.. automodule:: medacy.ner.model.stratified_k_fold
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/docs/source/medacy.ner.pipelines.base.base_pipeline.rst b/docs/source/medacy.ner.pipelines.base.base_pipeline.rst
new file mode 100644
index 00000000..4d487e7e
--- /dev/null
+++ b/docs/source/medacy.ner.pipelines.base.base_pipeline.rst
@@ -0,0 +1,7 @@
+medacy.ner.pipelines.base.base\_pipeline module
+===========================================
+
+.. automodule:: medacy.ner.pipelines.base.base_pipeline
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/medacy.ner.pipelines.base.rst b/docs/source/medacy.ner.pipelines.base.rst
new file mode 100644
index 00000000..7e073844
--- /dev/null
+++ b/docs/source/medacy.ner.pipelines.base.rst
@@ -0,0 +1,6 @@
+medacy.ner.pipelines.base package
+=============================
+
+.. toctree::
+
+    medacy.ner.pipelines.base.base_pipeline
diff --git a/docs/source/medacy.ner.pipelines.clinical_pipeline.rst b/docs/source/medacy.ner.pipelines.clinical_pipeline.rst
new file mode 100644
index 00000000..c6a766c2
--- /dev/null
+++ b/docs/source/medacy.ner.pipelines.clinical_pipeline.rst
@@ -0,0 +1,7 @@
+medacy.ner.pipelines.clinical\_pipeline module
+==========================================
+
+.. automodule:: medacy.ner.pipelines.clinical_pipeline
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/medacy.ner.pipelines.drug_event_pipeline.rst b/docs/source/medacy.ner.pipelines.drug_event_pipeline.rst
new file mode 100644
index 00000000..4d26fd29
--- /dev/null
+++ b/docs/source/medacy.ner.pipelines.drug_event_pipeline.rst
@@ -0,0 +1,7 @@
+medacy.ner.pipelines.drug\_event\_pipeline module
+=============================================
+
+.. automodule:: medacy.ner.pipelines.drug_event_pipeline
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/medacy.ner.pipelines.fda_nano_drug_label_pipeline.rst b/docs/source/medacy.ner.pipelines.fda_nano_drug_label_pipeline.rst
new file mode 100644
index 00000000..13a74898
--- /dev/null
+++ b/docs/source/medacy.ner.pipelines.fda_nano_drug_label_pipeline.rst
@@ -0,0 +1,7 @@
+medacy.ner.pipelines.fda\_nano\_drug\_label\_pipeline module
+========================================================
+
+.. automodule:: medacy.ner.pipelines.fda_nano_drug_label_pipeline
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/medacy.ner.pipelines.rst b/docs/source/medacy.ner.pipelines.rst
new file mode 100644
index 00000000..ff8eacc2
--- /dev/null
+++ b/docs/source/medacy.ner.pipelines.rst
@@ -0,0 +1,11 @@
+medacy.ner.pipelines package
+========================
+
+.. toctree::
+
+    medacy.ner.pipelines.base
+    medacy.ner.pipelines.clinical_pipeline
+    medacy.ner.pipelines.drug_event_pipeline
+    medacy.ner.pipelines.fda_nano_drug_label_pipeline
+    medacy.ner.pipelines.systematic_review_pipeline
+    medacy.ner.pipelines.testing_pipeline
diff --git a/docs/source/medacy.ner.pipelines.systematic_review_pipeline.rst b/docs/source/medacy.ner.pipelines.systematic_review_pipeline.rst
new file mode 100644
index 00000000..ebf76cd0
--- /dev/null
+++ b/docs/source/medacy.ner.pipelines.systematic_review_pipeline.rst
@@ -0,0 +1,7 @@
+medacy.ner.pipelines.systematic\_review\_pipeline module
+====================================================
+
+.. automodule:: medacy.ner.pipelines.systematic_review_pipeline
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/medacy.pipelines.testing_pipeline.rst b/docs/source/medacy.ner.pipelines.testing_pipeline.rst
similarity index 50%
rename from docs/source/medacy.pipelines.testing_pipeline.rst
rename to docs/source/medacy.ner.pipelines.testing_pipeline.rst
index 5e40839b..4038af9d 100644
--- a/docs/source/medacy.pipelines.testing_pipeline.rst
+++ b/docs/source/medacy.ner.pipelines.testing_pipeline.rst
@@ -1,7 +1,7 @@
-medacy.pipelines.testing\_pipeline module
+medacy.ner.pipelines.testing\_pipeline module
 =========================================
 
-.. automodule:: medacy.pipelines.testing_pipeline
+.. automodule:: medacy.ner.pipelines.testing_pipeline
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/docs/source/medacy.ner.rst b/docs/source/medacy.ner.rst
new file mode 100644
index 00000000..727b4ec3
--- /dev/null
+++ b/docs/source/medacy.ner.rst
@@ -0,0 +1,7 @@
+medacy.ner package
+====================
+
+.. toctree::
+
+    medacy.ner.model
+    medacy.ner.pipelines
diff --git a/docs/source/medacy.pipeline_components.feature_extraction.discrete_feature_extractor.rst b/docs/source/medacy.pipeline_components.feature_extraction.discrete_feature_extractor.rst
new file mode 100644
index 00000000..fd39e43f
--- /dev/null
+++ b/docs/source/medacy.pipeline_components.feature_extraction.discrete_feature_extractor.rst
@@ -0,0 +1,7 @@
+medacy.pipeline_components.feature_extraction.feature\_extractor module
+======================================
+
+.. automodule:: medacy.pipeline_components.feature_extraction.discrete_feature_extractor
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/medacy.pipeline_components.feature_extraction.rst b/docs/source/medacy.pipeline_components.feature_extraction.rst
new file mode 100644
index 00000000..ac91fb07
--- /dev/null
+++ b/docs/source/medacy.pipeline_components.feature_extraction.rst
@@ -0,0 +1,6 @@
+medacy.pipeline\_components.feature\_extraction package
+========================================
+
+.. toctree::
+
+    medacy.pipeline_components.feature_extraction.discrete_feature_extractor
diff --git a/docs/source/medacy.pipeline_components.rst b/docs/source/medacy.pipeline_components.rst
index 8ec5b41a..8ba68141 100644
--- a/docs/source/medacy.pipeline_components.rst
+++ b/docs/source/medacy.pipeline_components.rst
@@ -9,3 +9,4 @@ medacy.pipeline\_components package
     medacy.pipeline_components.metamap
     medacy.pipeline_components.tokenization
     medacy.pipeline_components.units
+    medacy.pipeline_components.feature_extraction
diff --git a/docs/source/medacy.pipelines.base.base_pipeline.rst b/docs/source/medacy.pipelines.base.base_pipeline.rst
deleted file mode 100644
index e6e6cbe3..00000000
--- a/docs/source/medacy.pipelines.base.base_pipeline.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-medacy.pipelines.base.base\_pipeline module
-===========================================
-
-.. automodule:: medacy.pipelines.base.base_pipeline
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/source/medacy.pipelines.base.rst b/docs/source/medacy.pipelines.base.rst
deleted file mode 100644
index 498205b5..00000000
--- a/docs/source/medacy.pipelines.base.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-medacy.pipelines.base package
-=============================
-
-.. toctree::
-
-    medacy.pipelines.base.base_pipeline
diff --git a/docs/source/medacy.pipelines.clinical_pipeline.rst b/docs/source/medacy.pipelines.clinical_pipeline.rst
deleted file mode 100644
index d168800f..00000000
--- a/docs/source/medacy.pipelines.clinical_pipeline.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-medacy.pipelines.clinical\_pipeline module
-==========================================
-
-.. automodule:: medacy.pipelines.clinical_pipeline
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/source/medacy.pipelines.drug_event_pipeline.rst b/docs/source/medacy.pipelines.drug_event_pipeline.rst
deleted file mode 100644
index 3dedd7ec..00000000
--- a/docs/source/medacy.pipelines.drug_event_pipeline.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-medacy.pipelines.drug\_event\_pipeline module
-=============================================
-
-.. automodule:: medacy.pipelines.drug_event_pipeline
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/source/medacy.pipelines.fda_nano_drug_label_pipeline.rst b/docs/source/medacy.pipelines.fda_nano_drug_label_pipeline.rst
deleted file mode 100644
index e5adca71..00000000
--- a/docs/source/medacy.pipelines.fda_nano_drug_label_pipeline.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-medacy.pipelines.fda\_nano\_drug\_label\_pipeline module
-========================================================
-
-.. automodule:: medacy.pipelines.fda_nano_drug_label_pipeline
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/source/medacy.pipelines.rst b/docs/source/medacy.pipelines.rst
deleted file mode 100644
index 75e56cb6..00000000
--- a/docs/source/medacy.pipelines.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-medacy.pipelines package
-========================
-
-.. toctree::
-
-    medacy.pipelines.base
-    medacy.pipelines.clinical_pipeline
-    medacy.pipelines.drug_event_pipeline
-    medacy.pipelines.fda_nano_drug_label_pipeline
-    medacy.pipelines.systematic_review_pipeline
-    medacy.pipelines.testing_pipeline
diff --git a/docs/source/medacy.pipelines.systematic_review_pipeline.rst b/docs/source/medacy.pipelines.systematic_review_pipeline.rst
deleted file mode 100644
index 98eec876..00000000
--- a/docs/source/medacy.pipelines.systematic_review_pipeline.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-medacy.pipelines.systematic\_review\_pipeline module
-====================================================
-
-.. automodule:: medacy.pipelines.systematic_review_pipeline
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/source/medacy.relation.rst b/docs/source/medacy.relation.rst
new file mode 100644
index 00000000..a482f355
--- /dev/null
+++ b/docs/source/medacy.relation.rst
@@ -0,0 +1,4 @@
+medacy.relation package
+====================
+
+.. toctree::
diff --git a/docs/source/medacy.rst b/docs/source/medacy.rst
index 954db934..00d2068a 100644
--- a/docs/source/medacy.rst
+++ b/docs/source/medacy.rst
@@ -4,7 +4,7 @@ medacy package
 .. toctree::
 
     medacy.data
-    medacy.model
+    medacy.ner
+    medacy.relation
     medacy.pipeline_components
-    medacy.pipelines
     medacy.tools
diff --git a/examples/guide/data_management.md b/examples/guide/data_management.md
index 943c58c0..429073a5 100644
--- a/examples/guide/data_management.md
+++ b/examples/guide/data_management.md
@@ -157,7 +157,7 @@ Once you have a trained or imported a model, pass in a Dataset object for bulk p
 
 ```python
 from medacy.data import Dataset
-from medacy.model import Model
+from medacy.ner.model import Model
 
 dataset = Dataset('/home/medacy/data')
 model = Model.load_external('medacy_model_clinical_notes')
diff --git a/examples/guide/model_training.md b/examples/guide/model_training.md
index b5460ad6..8e3ccc22 100644
--- a/examples/guide/model_training.md
+++ b/examples/guide/model_training.md
@@ -66,7 +66,7 @@ The previously mentioned components make up a medaCy model. In summary training
 import os
 from medacy.data import Dataset
 from medacy.pipelines import ClinicalPipeline
-from medacy.model import Model
+from medacy.ner import Model
 
 entities = ['Drug', 'Strength']
 
@@ -91,7 +91,7 @@ The `ClinicalPipeline` source looks like this:
 import spacy, sklearn_crfsuite
 from .base import BasePipeline
 from ..pipeline_components import ClinicalTokenizer
-from medacy.model.feature_extractor import FeatureExtractor
+from medacy.pipeline_components.feature_extractor import FeatureExtractor
 
 from ..pipeline_components import GoldAnnotatorComponent, MetaMapComponent, UnitComponent, MetaMap
 
diff --git a/examples/guide/model_utilization.md b/examples/guide/model_utilization.md
index 3d0e1482..b6fb00b2 100644
--- a/examples/guide/model_utilization.md
+++ b/examples/guide/model_utilization.md
@@ -9,7 +9,7 @@ Once a CRF model has been trained and saved to disk, it can be loaded again for
 
 ```python
 from medacy.pipelines import ClinicalPipeline
-from medacy.model import Model
+from medacy.ner import Model
 
 pipeline = ClinicalPipeline(metamap=None, entities=['Drug'])
 model = Model(pipeline)
@@ -30,7 +30,7 @@ Once a model has been [packaged](packaging_a_medacy_model.md) and installed it c
 
 ```python
 import medacy_model_clinical_notes #import the python package wrapping the model
-from medacy.model import Model
+from medacy.ner import Model
 
 model = Model.load_external('medacy_model_clinical_notes')
 
diff --git a/examples/scripts/training_predicting.py b/examples/scripts/training_predicting.py
index c906580d..8de05973 100644
--- a/examples/scripts/training_predicting.py
+++ b/examples/scripts/training_predicting.py
@@ -4,7 +4,7 @@
 # it's own directory along the models build log and model/pipeline parameters to keep results easily referencable during run time.
 # Once a sufficent model is produced, consider wrapping it up into a medaCy compatible model as defined the example guide.
 
-from medacy.model import Model
+from medacy.ner import Model
 from medacy.pipelines import SystematicReviewPipeline
 from medacy.data import Dataset
 from medacy.pipeline_components import MetaMap
diff --git a/medacy/__init__.py b/medacy/__init__.py
index 535d6213..d1f710d0 100644
--- a/medacy/__init__.py
+++ b/medacy/__init__.py
@@ -1,2 +1,2 @@
-__version__ = '0.0.9'
+__version__ = '0.1.0'
 __authors__ = "Andriy Mulyar, Corey Sutphin, Bobby Best, Steele Farnsworth, Bridget McInnes"
diff --git a/medacy/data/dataset.py b/medacy/data/dataset.py
index 0ae34968..3181c619 100644
--- a/medacy/data/dataset.py
+++ b/medacy/data/dataset.py
@@ -92,6 +92,10 @@ def __init__(self, data_directory,
         """
         Manages directory of training data along with other medaCy generated files.
 
+        Only text files: considers a directory for managing metamapping.
+        Only ann files: considers a directory of predictions.
+        Both text and ann files: considers a directory for training.
+
         :param data_directory: Directory containing data for training or prediction.
         :param raw_text_file_extension: The file extension of raw text files in the data_directory (default: *.txt*)
         :param annotation_file_extension: The file extension of annotation files in the data_directory (default: *.ann*)
@@ -112,42 +116,59 @@ def __init__(self, data_directory,
         # start by filtering all raw_text files, both training and prediction directories will have these
         raw_text_files = sorted([file for file in all_files_in_directory if file.endswith(raw_text_file_extension)])
 
-        if raw_text_files is None:
-            raise ValueError("No raw text files exist in directory: %s" % self.data_directory)
 
-        if data_limit is not None:
-            self.data_limit = data_limit
-        else:
-            self.data_limit = len(raw_text_files)
+        if not raw_text_files: #detected a prediction directory
+            ann_files = sorted([file for file in all_files_in_directory if file.endswith(annotation_file_extension)])
+            self.is_training_directory = False
 
-        if self.data_limit < 1 or self.data_limit > len(raw_text_files):
-            raise ValueError("Parameter 'data_limit' must be between 1 and number of raw text files in data_directory")
+            if data_limit is not None:
+                self.data_limit = data_limit
+            else:
+                self.data_limit = len(ann_files)
 
-        # required ann files for this to be a training directory
-        ann_files = [file.replace(".%s" % raw_text_file_extension, ".%s" % annotation_file_extension) for file in
-                     raw_text_files]
+            for file in ann_files:
+                annotation_path = os.path.join(data_directory, file)
+                file_name = file[:-len(annotation_file_extension) - 1]
+                self.all_data_files.append(DataFile(file_name, None, annotation_path))
 
-        # only a training directory if every text file has a corresponding ann_file
-        self.is_training_directory = all([os.path.isfile(os.path.join(data_directory, ann_file)) for ann_file in ann_files])
 
-        # set all file attributes except metamap_path as it is optional.
-        for file in raw_text_files:
-            file_name = file[:-len(raw_text_file_extension) - 1]
-            raw_text_path = os.path.join(data_directory, file)
+        else: #detected a training directory (raw text files exist)
 
-            if self.is_training_directory:
-                annotation_path = os.path.join(data_directory, file.replace(".%s" % raw_text_file_extension,
-                                                                     ".%s" % annotation_file_extension))
+            if data_limit is not None:
+                self.data_limit = data_limit
             else:
-                annotation_path = None
-            self.all_data_files.append(DataFile(file_name, raw_text_path, annotation_path))
+                self.data_limit = len(raw_text_files)
 
-        #If directory is already metamapped, use it.
-        if self.is_metamapped():
-            for data_file in self.all_data_files:
-                data_file.metamapped_path = os.path.join(self.metamapped_files_directory,
-                                                         data_file.raw_path.split(os.path.sep)[-1]
-                                                         .replace(".%s" % self.raw_text_file_extension, ".metamapped"))
+            if self.data_limit < 1 or self.data_limit > len(raw_text_files):
+                raise ValueError(
+                    "Parameter 'data_limit' must be between 1 and number of raw text files in data_directory")
+
+            # required ann files for this to be a training directory
+            ann_files = [file.replace(".%s" % raw_text_file_extension, ".%s" % annotation_file_extension) for file
+                         in
+                         raw_text_files]
+            # only a training directory if every text file has a corresponding ann_file
+            self.is_training_directory = all([os.path.isfile(os.path.join(data_directory, ann_file)) for ann_file in ann_files])
+
+
+            # set all file attributes except metamap_path as it is optional.
+            for file in raw_text_files:
+                file_name = file[:-len(raw_text_file_extension) - 1]
+                raw_text_path = os.path.join(data_directory, file)
+
+                if self.is_training_directory:
+                    annotation_path = os.path.join(data_directory, file.replace(".%s" % raw_text_file_extension,
+                                                                         ".%s" % annotation_file_extension))
+                else:
+                    annotation_path = None
+                self.all_data_files.append(DataFile(file_name, raw_text_path, annotation_path))
+
+            #If directory is already metamapped, use it.
+            if self.is_metamapped():
+                for data_file in self.all_data_files:
+                    data_file.metamapped_path = os.path.join(self.metamapped_files_directory,
+                                                             data_file.raw_path.split(os.path.sep)[-1]
+                                                             .replace(".%s" % self.raw_text_file_extension, ".metamapped"))
 
 
     def get_data_files(self):
@@ -330,7 +351,7 @@ def compute_confusion_matrix(self, dataset, leniency=0):
             raise ValueError("dataset must be instance of Dataset")
 
         #verify files are consistent
-        diff = set([file.ann_path for file in self]).difference(set([file.ann_path for file in dataset]))
+        diff = set([file.ann_path.split(os.sep)[-1] for file in self]).difference(set([file.ann_path.split(os.sep)[-1] for file in dataset]))
         if diff:
             raise ValueError("Dataset of predictions is missing the files: "+str(list(diff)))
 
@@ -362,13 +383,14 @@ def compute_ambiguity(self, dataset):
         of a model's in-ability to dis-ambiguate between entities. For a full analysis, compute a confusion matrix.
 
         :param dataset: a Dataset object containing a predicted version of this dataset.
+        :param leniency: a floating point value between [0,1] defining the leniency of the character spans to count as different. A value of zero considers only exact character matches while a positive value considers entities that differ by up to :code:`ceil(leniency * len(span)/2)` on either side.
         :return: a dictionary containing the ambiguity computations on each gold, predicted file pair
         """
         if not isinstance(dataset, Dataset):
             raise ValueError("dataset must be instance of Dataset")
 
         # verify files are consistent
-        diff = set([file.ann_path for file in self]).difference(set([file.ann_path for file in dataset]))
+        diff = set([file.ann_path.split(os.sep)[-1] for file in self]).difference(set([file.ann_path.split(os.sep)[-1] for file in dataset]))
         if diff:
             raise ValueError("Dataset of predictions is missing the files: " + str(list(diff)))
 
diff --git a/medacy/ner/__init__.py b/medacy/ner/__init__.py
new file mode 100644
index 00000000..0ac52b69
--- /dev/null
+++ b/medacy/ner/__init__.py
@@ -0,0 +1 @@
+from .model.model import Model
diff --git a/medacy/model/__init__.py b/medacy/ner/model/__init__.py
similarity index 62%
rename from medacy/model/__init__.py
rename to medacy/ner/model/__init__.py
index eaaa1316..b26bfe83 100644
--- a/medacy/model/__init__.py
+++ b/medacy/ner/model/__init__.py
@@ -1,3 +1,2 @@
 from .model import Model
-from .feature_extractor import FeatureExtractor
 from .stratified_k_fold import SequenceStratifiedKFold
\ No newline at end of file
diff --git a/medacy/model/_model.py b/medacy/ner/model/_model.py
similarity index 100%
rename from medacy/model/_model.py
rename to medacy/ner/model/_model.py
diff --git a/medacy/model/model.py b/medacy/ner/model/model.py
similarity index 90%
rename from medacy/model/model.py
rename to medacy/ner/model/model.py
index a60f073c..2acc372c 100644
--- a/medacy/model/model.py
+++ b/medacy/ner/model/model.py
@@ -5,7 +5,7 @@
 import logging, os, joblib, time, importlib
 from medacy.data import Dataset
 from .stratified_k_fold import SequenceStratifiedKFold
-from medacy.pipelines.base.base_pipeline import BasePipeline
+from medacy.ner.pipelines import BasePipeline
 from pathos.multiprocessing import ProcessingPool as Pool, cpu_count
 from ._model import predict_document, construct_annotations_from_tuples
 from sklearn_crfsuite import metrics
@@ -126,17 +126,27 @@ def predict(self, dataset, prediction_directory = None):
             annotations = predict_document(model, doc, medacy_pipeline)
             return annotations
 
-    def cross_validate(self, num_folds=10, dataset=None, write_predictions=False):
+    def cross_validate(self, num_folds=10, training_dataset=None, prediction_directory=None):
         """
         Performs k-fold stratified cross-validation using our model and pipeline.
 
+        If the training dataset and prediction_directory are passed, intermediate predictions during cross validation
+        are written to the directory `write_predictions`. This allows one to construct a confusion matrix or to compute
+        the prediction ambiguity with the methods present in the Dataset class to support pipeline development without
+        a designated evaluation set.
+
         :param num_folds: number of folds to split training data into for cross validation
-        :param dataset: Dataset that sequences were extracted from
-        :return: Prints out performance metrics
+        :param training_dataset: Dataset that is being cross validated (optional)
+        :param prediction_directory: directory to write predictions of cross validation to or `True` for default predictions sub-directory.
+        :return: Prints out performance metrics, if prediction_directory
         """
 
         if num_folds <= 1: raise ValueError("Number of folds for cross validation must be greater than 1")
 
+        if prediction_directory is not None and training_dataset is None:
+            raise ValueError("Cannot generated predictions during cross validation if training dataset is not given."
+                             " Please pass the training dataset in the 'training_dataset' parameter.")
+
         assert self.model is not None, "Cannot cross validate a un-fit model"
         assert self.X_data is not None and self.y_data is not None, \
             "Must have features and labels extracted for cross validation"
@@ -168,7 +178,7 @@ def cross_validate(self, num_folds=10, dataset=None, write_predictions=False):
             learner.fit(train_data, y_train)
             y_pred = learner.predict(test_data)
 
-            if write_predictions:
+            if prediction_directory is not None:
                 # Dict for storing mapping of sequences to their corresponding file
                 preds_by_document = {filename: [] for filename in list(set([x[2] for x in X_data]))}
 
@@ -264,16 +274,24 @@ def cross_validate(self, num_folds=10, dataset=None, write_predictions=False):
         logging.info("\n"+tabulate(table_data, headers=['Entity', 'Precision', 'Recall', 'F1', 'F1_Min', 'F1_Max'],
                        tablefmt='orgtbl'))
 
-        if write_predictions:
+        if prediction_directory:
             # Write annotations generated from cross-validation
-            prediction_directory = dataset.data_directory + "/predictions/"
-            for data_file in dataset.get_data_files():
+            if isinstance(prediction_directory, str):
+                prediction_directory = prediction_directory
+            else:
+                prediction_directory = training_dataset.data_directory + "/predictions/"
+            if os.path.isdir(prediction_directory):
+                logging.warning("Overwritting existing predictions")
+            else:
+                os.makedirs(prediction_directory)
+            for data_file in training_dataset.get_data_files():
                 logging.info("Predicting file: %s", data_file.file_name)
                 with open(data_file.raw_path, 'r') as raw_text:
                     doc = medacy_pipeline.spacy_pipeline.make_doc(raw_text.read())
                     preds = preds_by_document[data_file.file_name]
                     annotations = construct_annotations_from_tuples(doc, preds)
                     annotations.to_ann(write_location=os.path.join(prediction_directory, data_file.file_name + ".ann"))
+            return Dataset(data_directory=prediction_directory)
 
     def _extract_features(self, data_file, medacy_pipeline, is_metamapped):
         """
@@ -378,4 +396,3 @@ def load_external(package_name):
 
     def __str__(self):
         return self.get_info()
-
diff --git a/medacy/model/stratified_k_fold.py b/medacy/ner/model/stratified_k_fold.py
similarity index 100%
rename from medacy/model/stratified_k_fold.py
rename to medacy/ner/model/stratified_k_fold.py
diff --git a/medacy/pipelines/__init__.py b/medacy/ner/pipelines/__init__.py
similarity index 71%
rename from medacy/pipelines/__init__.py
rename to medacy/ner/pipelines/__init__.py
index 04e07315..162442e9 100644
--- a/medacy/pipelines/__init__.py
+++ b/medacy/ner/pipelines/__init__.py
@@ -2,4 +2,5 @@
 from .systematic_review_pipeline import SystematicReviewPipeline
 from .fda_nano_drug_label_pipeline import FDANanoDrugLabelPipeline
 from .drug_event_pipeline import DrugEventPipeline
-from .testing_pipeline import TestingPipeline
\ No newline at end of file
+from .testing_pipeline import TestingPipeline
+from .base.base_pipeline import BasePipeline
diff --git a/medacy/pipelines/base/__init__.py b/medacy/ner/pipelines/base/__init__.py
similarity index 100%
rename from medacy/pipelines/base/__init__.py
rename to medacy/ner/pipelines/base/__init__.py
diff --git a/medacy/pipelines/base/base_pipeline.py b/medacy/ner/pipelines/base/base_pipeline.py
similarity index 98%
rename from medacy/pipelines/base/base_pipeline.py
rename to medacy/ner/pipelines/base/base_pipeline.py
index 894b385d..1d04b3df 100644
--- a/medacy/pipelines/base/base_pipeline.py
+++ b/medacy/ner/pipelines/base/base_pipeline.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from ...pipeline_components.base import BaseComponent
+from medacy.pipeline_components.base import BaseComponent
 
 class BasePipeline(ABC):
     """
diff --git a/medacy/pipelines/clinical_pipeline.py b/medacy/ner/pipelines/clinical_pipeline.py
similarity index 88%
rename from medacy/pipelines/clinical_pipeline.py
rename to medacy/ner/pipelines/clinical_pipeline.py
index 073dc29d..cfef2cb0 100644
--- a/medacy/pipelines/clinical_pipeline.py
+++ b/medacy/ner/pipelines/clinical_pipeline.py
@@ -1,9 +1,9 @@
 import spacy, sklearn_crfsuite
 from .base import BasePipeline
-from ..pipeline_components import ClinicalTokenizer
-from medacy.model.feature_extractor import FeatureExtractor
+from medacy.pipeline_components import ClinicalTokenizer
+from medacy.pipeline_components.feature_extraction.discrete_feature_extractor import FeatureExtractor
 
-from ..pipeline_components import GoldAnnotatorComponent, MetaMapComponent, UnitComponent, MetaMap
+from medacy.pipeline_components import GoldAnnotatorComponent, MetaMapComponent, MetaMap
 
 
 class ClinicalPipeline(BasePipeline):
diff --git a/medacy/pipelines/drug_event_pipeline.py b/medacy/ner/pipelines/drug_event_pipeline.py
similarity index 89%
rename from medacy/pipelines/drug_event_pipeline.py
rename to medacy/ner/pipelines/drug_event_pipeline.py
index 7f2d4ee9..80a5c802 100644
--- a/medacy/pipelines/drug_event_pipeline.py
+++ b/medacy/ner/pipelines/drug_event_pipeline.py
@@ -1,10 +1,10 @@
 import spacy, sklearn_crfsuite
 from .base import BasePipeline
-from medacy.model.feature_extractor import FeatureExtractor
+from medacy.pipeline_components.feature_extraction.discrete_feature_extractor import FeatureExtractor
 
-from ..pipeline_components import GoldAnnotatorComponent, MetaMapComponent, CharacterTokenizer
-from ..pipeline_components.lexicon import LexiconComponent
-from ..pipeline_components.patterns import TableMatcherComponent
+from medacy.pipeline_components import GoldAnnotatorComponent, MetaMapComponent, CharacterTokenizer
+from medacy.pipeline_components.lexicon import LexiconComponent
+from medacy.pipeline_components.patterns import TableMatcherComponent
 
 class DrugEventPipeline(BasePipeline):
 
@@ -174,4 +174,4 @@ def get_tokenizer(self):
 
     def get_feature_extractor(self):
         extractor = FeatureExtractor(window_size=3, spacy_features=['pos_', 'shape_', 'prefix_', 'suffix_', 'like_num', 'text', 'head'])
-        return extractor
+        return extractor
\ No newline at end of file
diff --git a/medacy/pipelines/fda_nano_drug_label_pipeline.py b/medacy/ner/pipelines/fda_nano_drug_label_pipeline.py
similarity index 88%
rename from medacy/pipelines/fda_nano_drug_label_pipeline.py
rename to medacy/ner/pipelines/fda_nano_drug_label_pipeline.py
index a5b014b1..d663c58d 100644
--- a/medacy/pipelines/fda_nano_drug_label_pipeline.py
+++ b/medacy/ner/pipelines/fda_nano_drug_label_pipeline.py
@@ -1,9 +1,9 @@
 import spacy, sklearn_crfsuite
 from .base import BasePipeline
-from ..pipeline_components import SystematicReviewTokenizer, ClinicalTokenizer
-from medacy.model.feature_extractor import FeatureExtractor
+from medacy.pipeline_components import ClinicalTokenizer
+from medacy.pipeline_components.feature_extraction.discrete_feature_extractor import FeatureExtractor
 
-from ..pipeline_components import GoldAnnotatorComponent, MetaMapComponent, UnitComponent
+from medacy.pipeline_components import GoldAnnotatorComponent, MetaMapComponent
 
 
 class FDANanoDrugLabelPipeline(BasePipeline):
diff --git a/medacy/pipelines/systematic_review_pipeline.py b/medacy/ner/pipelines/systematic_review_pipeline.py
similarity index 88%
rename from medacy/pipelines/systematic_review_pipeline.py
rename to medacy/ner/pipelines/systematic_review_pipeline.py
index f007e850..84a797e9 100644
--- a/medacy/pipelines/systematic_review_pipeline.py
+++ b/medacy/ner/pipelines/systematic_review_pipeline.py
@@ -1,9 +1,9 @@
 import spacy, sklearn_crfsuite
 from .base import BasePipeline
-from ..pipeline_components import MetaMap, SystematicReviewTokenizer
-from medacy.model.feature_extractor import FeatureExtractor
+from medacy.pipeline_components import MetaMap, SystematicReviewTokenizer
+from medacy.pipeline_components.feature_extraction.discrete_feature_extractor import FeatureExtractor
 
-from ..pipeline_components import GoldAnnotatorComponent, MetaMapComponent, UnitComponent
+from medacy.pipeline_components import GoldAnnotatorComponent, MetaMapComponent
 
 
 class SystematicReviewPipeline(BasePipeline):
diff --git a/medacy/pipelines/testing_pipeline.py b/medacy/ner/pipelines/testing_pipeline.py
similarity index 87%
rename from medacy/pipelines/testing_pipeline.py
rename to medacy/ner/pipelines/testing_pipeline.py
index 471abb77..57dc661b 100644
--- a/medacy/pipelines/testing_pipeline.py
+++ b/medacy/ner/pipelines/testing_pipeline.py
@@ -1,9 +1,9 @@
 import spacy, sklearn_crfsuite
 from .base import BasePipeline
-from ..pipeline_components import ClinicalTokenizer
-from medacy.model.feature_extractor import FeatureExtractor
+from medacy.pipeline_components import ClinicalTokenizer
+from medacy.pipeline_components.feature_extraction.discrete_feature_extractor import FeatureExtractor
 
-from ..pipeline_components import GoldAnnotatorComponent
+from medacy.pipeline_components import GoldAnnotatorComponent
 
 
 class TestingPipeline(BasePipeline):
diff --git a/medacy/pipeline_components/__init__.py b/medacy/pipeline_components/__init__.py
index ad2b6e22..f768d364 100644
--- a/medacy/pipeline_components/__init__.py
+++ b/medacy/pipeline_components/__init__.py
@@ -17,3 +17,6 @@
 from .units.time_unit_component import TimeUnitComponent
 from .units.frequency_unit_component import FrequencyUnitComponent
 from .units.measurement_unit_component import MeasurementUnitComponent
+
+
+from .feature_extraction.discrete_feature_extractor import FeatureExtractor
diff --git a/medacy/tests/pipelines/__init__.py b/medacy/pipeline_components/feature_extraction/__init__.py
similarity index 100%
rename from medacy/tests/pipelines/__init__.py
rename to medacy/pipeline_components/feature_extraction/__init__.py
diff --git a/medacy/model/feature_extractor.py b/medacy/pipeline_components/feature_extraction/discrete_feature_extractor.py
similarity index 100%
rename from medacy/model/feature_extractor.py
rename to medacy/pipeline_components/feature_extraction/discrete_feature_extractor.py
diff --git a/medacy/pipeline_components/patterns/table_matcher_component.py b/medacy/pipeline_components/patterns/table_matcher_component.py
index 4db91681..9a1ab75b 100644
--- a/medacy/pipeline_components/patterns/table_matcher_component.py
+++ b/medacy/pipeline_components/patterns/table_matcher_component.py
@@ -31,8 +31,9 @@ def __call__(self, doc):
         for match in re.finditer(TABLE_PATTERN, doc.text):
             start, end = match.span()
             span = doc.char_span(start, end)
+            if span is None:
+                continue
             for token in span:
                 token._.set('feature_is_from_table', True)
 
         return doc
-
diff --git a/medacy/relation/__init__.py b/medacy/relation/__init__.py
new file mode 100644
index 00000000..0ac52b69
--- /dev/null
+++ b/medacy/relation/__init__.py
@@ -0,0 +1 @@
+from .model.model import Model
diff --git a/medacy/tests/tools/con_form/__init__.py b/medacy/tests/ner/__init__.py
similarity index 100%
rename from medacy/tests/tools/con_form/__init__.py
rename to medacy/tests/ner/__init__.py
diff --git a/medacy/tools/con_form/__init__.py b/medacy/tests/ner/model/__init__.py
similarity index 100%
rename from medacy/tools/con_form/__init__.py
rename to medacy/tests/ner/model/__init__.py
diff --git a/medacy/tests/model/test_model_prediction.py b/medacy/tests/ner/model/test_model_prediction.py
similarity index 95%
rename from medacy/tests/model/test_model_prediction.py
rename to medacy/tests/ner/model/test_model_prediction.py
index fa8fe20e..d06d90de 100644
--- a/medacy/tests/model/test_model_prediction.py
+++ b/medacy/tests/ner/model/test_model_prediction.py
@@ -1,6 +1,6 @@
 from unittest import TestCase
-from medacy.model import Model
-from medacy.pipelines import TestingPipeline
+from medacy.ner.model import Model
+from medacy.ner.pipelines import TestingPipeline
 from medacy.tools import Annotations
 from medacy.data import Dataset
 import os, importlib, pkg_resources, tempfile, shutil
diff --git a/medacy/tests/ner/pipelines/__init__.py b/medacy/tests/ner/pipelines/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/medacy/tests/pipelines/test_clinical_pipeline.py b/medacy/tests/ner/pipelines/test_clinical_pipeline.py
similarity index 92%
rename from medacy/tests/pipelines/test_clinical_pipeline.py
rename to medacy/tests/ner/pipelines/test_clinical_pipeline.py
index 28490c6d..63a494d6 100644
--- a/medacy/tests/pipelines/test_clinical_pipeline.py
+++ b/medacy/tests/ner/pipelines/test_clinical_pipeline.py
@@ -1,5 +1,5 @@
 from unittest import TestCase
-from medacy.pipelines import ClinicalPipeline
+from medacy.ner.pipelines import ClinicalPipeline
 from medacy.pipeline_components import GoldAnnotatorComponent, MetaMap
 
 
diff --git a/medacy/tests/tools/__init__.py b/medacy/tests/tools/__init__.py
index 6253b287..d0799406 100644
--- a/medacy/tests/tools/__init__.py
+++ b/medacy/tests/tools/__init__.py
@@ -1 +1 @@
-from .con_test_data.con_test import con_text, source_text
+from medacy.tests.tools.converters.con_test_data.con_test import con_text, source_text
diff --git a/medacy/tests/tools/converters/__init__.py b/medacy/tests/tools/converters/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/medacy/tests/tools/converters/con_test_data/__init__.py b/medacy/tests/tools/converters/con_test_data/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/medacy/tests/tools/con_test_data/con_test.py b/medacy/tests/tools/converters/con_test_data/con_test.py
similarity index 100%
rename from medacy/tests/tools/con_test_data/con_test.py
rename to medacy/tests/tools/converters/con_test_data/con_test.py
diff --git a/medacy/tests/tools/converters/conversion_tools/test_line.py b/medacy/tests/tools/converters/conversion_tools/test_line.py
new file mode 100644
index 00000000..a6409b12
--- /dev/null
+++ b/medacy/tests/tools/converters/conversion_tools/test_line.py
@@ -0,0 +1,45 @@
+import unittest
+from medacy.tools.converters.conversion_tools.line import Line
+
+
+# Sample text must be on the lowest level of indentation so that
+# the indentation is not counted towards the indices.
+
+sample_text_1 = """ABELCET (Amphotericin B Lipid Complex Injection)DESCRIPTIONABELCET  is a sterile, pyrogen-free suspension for intravenous infusion.
+ABELCET  consists of ampho-tericin B complexed with two phospholipids in a 1:1 drug-to-lipid molar ratio.
+The two phospholipids,L-&#x3b1;-dimyristoylphosphatidylcholine (DMPC) and L-&#x3b1;-dimyristoylphosphatidylglycerol (DMPG), are pre-sent in a 7:3 molar ratio.
+ABELCET  is yellow and opaque in appearance, with a pH of 5 - 7.
+NOTE: Liposomal encapsulation or incorporation in a lipid complex can substantially affect adrug's functional properties relative to those of the unencapsulated or nonlipid-associated drug.
+Inaddition, different liposomal or lipid-complexed products with a common active ingredient mayvary from one another in the chemical composition and physical form of the lipid component.
+Suchdifferences may affect functional properties of these drug products.Amphotericin B is a polyene, antifungal antibiotic produced from a strain of Streptomyces nodosus.Amphotericin B is designated chemically as [1R-(1R*, 3S*, 5R*, 6R*, 9R*, 11R*, 15S*, 16R*, 17R*,18S*, 19E, 21E, 23E, 25E, 27E, 29E, 31E, 33R*, 35S*, 36R*, 37S*)]-33-[(3-Amino-3, 6- D-mannopyranosyl) oxy]-1,3,5,6,9,11,17,37-octahydroxy-15,16,18-trimethyl-13-oxo-14,39-dioxabicy-clo[33.3.1] nonatriaconta-19, 21, 23, 25, 27, 29, 31-heptaene-36-carboxylic acid.
+It has a molecular weight of 924.09 and a molecular formula of C47H73NO17.
+The structural formula is:
+ABELCET  is provided as a sterile, opaque suspension in 20 mL glass, single-use vials."""
+
+sample_text_2 = """This is the first sample line
+This is the second line
+Also this line
+This is another line
+Also this line
+The previous line is a repeat on purpose
+Also this line
+This is so much fun"""
+
+
+class TestLine(unittest.TestCase):
+    """Unit tests for line.py"""
+
+    def test_init_lines_no_repeats(self):
+        """Test that indices are accurate when there are no repeated lines."""
+        text_lines = sample_text_1.split('\n')
+        line_objs = Line.init_lines(sample_text_1)
+        expected = [sample_text_1.index(line) for line in text_lines]
+        actual = [line.index for line in line_objs]
+        self.assertListEqual(actual, expected)
+
+    def test_init_lines_with_repeats(self):
+        """Test that indices are accurate even when lines are repeated."""
+        line_objs = Line.init_lines(sample_text_2)
+        expected = [0, 30, 54, 69, 90, 105, 146, 161]
+        actual = [line.index for line in line_objs]
+        self.assertListEqual(actual, expected)
diff --git a/medacy/tests/tools/con_form/test_brat_to_con.py b/medacy/tests/tools/converters/test_brat_to_con.py
similarity index 92%
rename from medacy/tests/tools/con_form/test_brat_to_con.py
rename to medacy/tests/tools/converters/test_brat_to_con.py
index ed2feb06..d5521b85 100644
--- a/medacy/tests/tools/con_form/test_brat_to_con.py
+++ b/medacy/tests/tools/converters/test_brat_to_con.py
@@ -1,10 +1,10 @@
 """
 :author: Steele W. Farnsworth
-:date: 28 December, 2018
+:date: 13 March, 2019
 """
 
-import unittest, tempfile, os, shutil
-from medacy.tools.con_form.brat_to_con import *
+import unittest, tempfile
+from medacy.tools.converters.brat_to_con import *
 
 brat_text = """T1	tradename 0 7	ABELCET
 T2	activeingredient 9 23	Amphotericin B
@@ -92,33 +92,35 @@ def setUpClass(cls):
 
         cls.output_file_path = os.path.join(cls.test_dir, "output_file.txt")
 
+        cls.lines = Line.init_lines(source_text)
+
     @classmethod
     def tearDownClass(cls):
         shutil.rmtree(cls.test_dir)
 
-    def is_valid_brat_valid_1(self):
+    def test_is_valid_brat_valid_1(self):
         """Tests that when is_valid_brat() gets called on a valid line without a new line character, it returns True."""
         sample = "T3	nanoparticle 24 37	Lipid Complex"
         result = is_valid_brat(sample)
         self.assertTrue(result)
 
-    def is_valid_brat_valid_2(self):
+    def test_is_valid_brat_valid_2(self):
         """Tests that when is_valid_brat() is called on a valid line with a new line character, it returns True."""
         sample = "T12	nanoparticle 674 683	liposomal\n"
         result = is_valid_brat(sample)
         self.assertTrue(result)
 
-    def is_valid_brat_invalid_1(self):
+    def test_is_valid_brat_invalid_1(self):
         """Tests what when is_valid_brat() is called on an invalid line without a new line character, it returns False."""
         sample = "T3	nanoparticle s 37	Lipid Complex"
         result = is_valid_brat(sample)
         self.assertFalse(result)
 
-    def is_valid_brat_invalid_2(self):
+    def test_is_valid_brat_invalid_2(self):
         """Tests what when is_valid_brat() is called on an invalid line with a new line character, it returns False."""
         sample = "T12 674 683	liposomal\n"
         result = is_valid_brat(sample)
-        self.assertTrue(result)
+        self.assertFalse(result)
 
     def test_line_to_dict(self):
         """Tests that line_to_dict() accurately converts a line of input text to an expected dict format."""
@@ -142,10 +144,10 @@ def test_get_word_num_1(self):
         """
         # The annotation used is "T5	tradename 132 139	ABELCET"
         sample_line = "ABELCET  consists of ampho-tericin B complexed with two phospholipids in a 1:1 drug-to-lipid molar ratio."
-        line_index = get_line_index(source_text, sample_line)
+        this_line = self.lines[1]
         expected = 0
-        actual = get_word_num(source_text, line_index, 132)
-        self.assertEqual(expected, actual)
+        actual = get_word_num(this_line, 132)
+        self.assertEqual(actual, expected)
 
     def test_get_word_num_2(self):
         """
@@ -154,17 +156,16 @@ def test_get_word_num_2(self):
         """
         # The annotation used is "T16	activeingredient 1009 1023	Amphotericin B"
         sample_line = "Suchdifferences may affect functional properties of these drug products.Amphotericin B is a polyene, antifungal antibiotic produced from a strain of Streptomyces nodosus.Amphotericin B is designated chemically as [1R-(1R*, 3S*, 5R*, 6R*, 9R*, 11R*, 15S*, 16R*, 17R*,18S*, 19E, 21E, 23E, 25E, 27E, 29E, 31E, 33R*, 35S*, 36R*, 37S*)]-33-[(3-Amino-3, 6- D-mannopyranosyl) oxy]-1,3,5,6,9,11,17,37-octahydroxy-15,16,18-trimethyl-13-oxo-14,39-dioxabicy-clo[33.3.1] nonatriaconta-19, 21, 23, 25, 27, 29, 31-heptaene-36-carboxylic acid."
-        line_index = get_line_index(source_text, sample_line)
+        this_line = self.lines[6]
         expected = 21
-        actual = get_word_num(source_text, line_index, 1009)
+        actual = get_word_num(this_line, 1009)
         self.assertEqual(expected, actual)
-        
-    @unittest.skip("Not currently working")
+
     def test_valid_brat_to_con(self):
         """Convert the test file from brat to con. Assert that the con output matches the sample con text."""
         con_output = convert_brat_to_con(self.brat_file_path, self.text_file_path)
-        self.assertEqual(con_output, con_text)
-    
+        self.assertEqual(con_text, con_output)
+
     def test_invalid_file_path(self):
         """Passes an invalid file path to convert_brat_to_con()."""
         with self.assertRaises(FileNotFoundError):
@@ -175,7 +176,7 @@ def test_valid_brat_matching_text_name(self):
         Assert that the con output matches the sample con text when the automatic text-file-finding feature is utilized
         """
         con_output = convert_brat_to_con(self.brat_file_path)
-        self.assertEqual(con_output, con_text)
+        self.assertEqual(con_text, con_output)
 
     def test_invalid_brat_text(self):
         """Assert that invalid brat text produces no output."""
diff --git a/medacy/tests/tools/con_form/test_con_to_brat.py b/medacy/tests/tools/converters/test_con_to_brat.py
similarity index 95%
rename from medacy/tests/tools/con_form/test_con_to_brat.py
rename to medacy/tests/tools/converters/test_con_to_brat.py
index 1bc0703d..3e3d0e2b 100644
--- a/medacy/tests/tools/con_form/test_con_to_brat.py
+++ b/medacy/tests/tools/converters/test_con_to_brat.py
@@ -1,10 +1,10 @@
 """
 :author: Steele W. Farnsworth
-:date: 17 February, 2019
+:date: 13 March, 2019
 """
 
-import unittest, tempfile, os, shutil
-from medacy.tools.con_form.con_to_brat import *
+import unittest, tempfile
+from medacy.tools.converters.con_to_brat import *
 
 brat_text = """T1	tradename 0 7	ABELCET
 T2	activeingredient 9 23	Amphotericin B
@@ -126,25 +126,25 @@ def test_line_to_dict(self):
         expected = {"data_item": "Amphotericin B", "start_ind": "7:8", "end_ind": "7:9", "data_type": "activeingredient"}
         actual = line_to_dict(sample)
         self.assertDictEqual(expected, actual)
-        
-    @unittest.skip("Not currently working")
+
+    @unittest.skip
     def test_valid_brat_to_con(self):
         """Convert the test file from brat to con. Assert that the con output matches the sample con text."""
         brat_output = convert_con_to_brat(self.con_file_path, self.text_file_path)
-        self.assertEqual(brat_output, brat_text)
+        self.assertEqual(brat_text, brat_output)
 
     def test_invalid_file_path(self):
         """Passes an invalid file path to convert_con_to_brat()."""
         with self.assertRaises(FileNotFoundError):
             convert_con_to_brat("this isn't a valid file path", "neither is this")
-    
-    @unittest.skip("Not currently working")
+
+    @unittest.skip
     def test_valid_con_matching_text_name(self):
         """
         Assert that the con output matches the sample con text when the automatic text-file-finding feature is utilized
         """
         brat_output = convert_con_to_brat(self.con_file_path)
-        self.assertEqual(brat_output, brat_text)
+        self.assertEqual(brat_text, brat_output)
 
     def test_invalid_brat_text(self):
         """Assert that invalid brat text produces no output."""
diff --git a/medacy/tests/tools/test_annotation.py b/medacy/tests/tools/test_annotation.py
index cbcf1dc7..48bd385d 100644
--- a/medacy/tests/tools/test_annotation.py
+++ b/medacy/tests/tools/test_annotation.py
@@ -2,8 +2,8 @@
 from unittest import TestCase
 from medacy.data import Dataset
 from medacy.tools import Annotations, InvalidAnnotationError
-from os.path import join, isfile
-from medacy.tests.tools.con_test_data.con_test import con_text, source_text as con_source_text
+from os.path import join
+from medacy.tests.tools.converters.con_test_data.con_test import con_text, source_text as con_source_text
 
 
 class TestAnnotation(TestCase):
diff --git a/medacy/tools/__init__.py b/medacy/tools/__init__.py
index 95516efd..5d95883f 100644
--- a/medacy/tools/__init__.py
+++ b/medacy/tools/__init__.py
@@ -1,3 +1,3 @@
 from .annotations import Annotations, InvalidAnnotationError
 from .data_file import DataFile
-from .con_form import brat_to_con, con_to_brat
+from medacy.tools.converters import brat_to_con, con_to_brat
diff --git a/medacy/tools/annotations.py b/medacy/tools/annotations.py
index 8f48232e..1076bad5 100644
--- a/medacy/tools/annotations.py
+++ b/medacy/tools/annotations.py
@@ -9,8 +9,8 @@
 elements correspond to keys in the 'entities' dictionary.
 """
 import os, logging, tempfile
-from medacy.tools.con_form.con_to_brat import convert_con_to_brat
-from medacy.tools.con_form.brat_to_con import convert_brat_to_con
+from medacy.tools.converters.con_to_brat import convert_con_to_brat
+from medacy.tools.converters.brat_to_con import convert_brat_to_con
 from math import floor, ceil
 import numpy as np
 from spacy.displacy import EntityRenderer
diff --git a/medacy/tools/converters/__init__.py b/medacy/tools/converters/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/medacy/tools/ade_to_brat.py b/medacy/tools/converters/ade_to_brat.py
similarity index 100%
rename from medacy/tools/ade_to_brat.py
rename to medacy/tools/converters/ade_to_brat.py
diff --git a/medacy/tools/con_form/brat_to_con.py b/medacy/tools/converters/brat_to_con.py
similarity index 75%
rename from medacy/tools/con_form/brat_to_con.py
rename to medacy/tools/converters/brat_to_con.py
index 25ac8b61..e8fe0b40 100644
--- a/medacy/tools/con_form/brat_to_con.py
+++ b/medacy/tools/converters/brat_to_con.py
@@ -3,31 +3,40 @@
 Each '.ann' file must have a '.txt' file in the same directory with the same name, minus the extension.
 Use '-c' (without quotes) as an optional final command-line argument to copy the text files used
 in the conversion process to the output directory.
+
 Also possible to import 'convert_brat_to_con()' directly and pass the paths to the ann and txt files
 for individual conversion.
 
 :author: Steele W. Farnsworth
-:date: 16 February, 2019
+:date: 13 March, 2019
 """
 
 from sys import argv
 from re import split, fullmatch, DOTALL, findall
+from medacy.tools.converters.conversion_tools.line import Line
+import re
 import os
 import shutil
 import logging
+import tabulate
 
 
 # A regex pattern for consecutive whitespace other than a new line character
-whitespace_pattern = "( +|\t+)+"
+whitespace_pattern = re.compile("( +|\t+)+")
+# Regex pattern for BRAT T annotations
+brat_pattern_T = r"T\d+\t\S+ \d+ \d+\t.+"
+
+# Used for stats at the end
+num_lines = 0
+num_skipped_regex = 0
 
 
 def is_valid_brat(item: str):
     """Returns a boolean value for whether or not a given line is in the BRAT format."""
     # Define the regex pattern for BRAT.
     # Note that this pattern allows for three to six spaces to count as a tab
-    brat_pattern = r"[TREAMN]\d+(\t| {3,6})\S+ \d+ \d+(\t| {3,6}).+"
     if not isinstance(item, str): return False
-    if fullmatch(brat_pattern, item, DOTALL): return True
+    if fullmatch(brat_pattern_T, item, DOTALL): return True
     else: return False
 
 
@@ -41,7 +50,7 @@ def line_to_dict(item):
     split1 = split("\t", item)
     split2 = split(" ", split1[1])
     split3 = [split1[0]] + split2 + [split1[2]]
-    s = [i.rstrip() for i in split3]  # remove whitespace
+    s = [i.strip() for i in split3]  # remove whitespace
     return {"id_type": s[0][0], "id_num": int(s[0][1:]), "data_type": s[1], "start_ind": int(s[2]),
             "end_ind": int(s[3]), "data_item": s[4]}
 
@@ -54,12 +63,6 @@ def switch_extension(name, ext):
     return os.path.splitext(name)[0] + ext
 
 
-def get_line_index(text_, line_):
-    """Returns the index of the start of a given line. Assumes that the line_
-    argument is long enough that (and thus so specific that) it only occurs once."""
-    return text_.index(line_)
-
-
 def find_line_num(text_, start):
     """
     :param text_: The text of the file, ex. f.read()
@@ -69,19 +72,18 @@ def find_line_num(text_, start):
     return text_[:int(start)].count("\n")
 
 
-def get_word_num(text_, line_index, entity_index):
+def get_word_num(line_obj: Line, entity_index):
     """
-    Returns the word number starting at zero that a given BRAT entity start index is within its line.
-    In the previous line, "Returns" is word 0 and "starting" is word 4. Words are counted by the number of consecutive
-    white spaces.
-    :param text_: The text of the document that the word occurs in.
-    :param line_index: The index of the first char of the line the word occurs in.
-    :param entity_index: The index of the first char of the word relative to the start of the document.
-    :return: The word number (see above explanation for what a word number is) of the given index within its line.
+    Returns the word number relative to the start of the line, with counting starting at 0,
+    of the first word of the entity.
+    :param line_obj: The Line that the entity occurs in.
+    :param entity_index: The absolute index of the entity, given by the annotation.
+    :return: The word index of the entity.
     """
-    substring_before_entity = text_[line_index:entity_index]
+    index_within_line = entity_index - line_obj.index
+    substring_before_entity = line_obj.text[:index_within_line]
     matched_spaces = findall(whitespace_pattern, substring_before_entity)
-    return matched_spaces.__len__()
+    return len(matched_spaces)
 
 
 def convert_brat_to_con(brat_file_path, text_file_path=None):
@@ -94,6 +96,8 @@ def convert_brat_to_con(brat_file_path, text_file_path=None):
     :return: A string (not a file) of the con equivalent of the brat file.
     """
 
+    global num_lines, num_skipped_regex
+
     # By default, find txt file with equivalent name
     if text_file_path is None:
         text_file_path = switch_extension(brat_file_path, ".txt")
@@ -102,12 +106,12 @@ def convert_brat_to_con(brat_file_path, text_file_path=None):
                                     " directory")
         with open(text_file_path, 'r') as text_file:
             text = text_file.read()
-            text_lines = text.split('\n')
+            text_lines = Line.init_lines(text)
     # Otherwise open the file with the path passed to the function
     elif os.path.isfile(text_file_path):
         with open(text_file_path, 'r') as text_file:
             text = text_file.read()
-            text_lines = text.split('\n')
+            text_lines = Line.init_lines(text)
     else: raise FileNotFoundError("No text file path was provided or the file was not found."
                                   " Note that direct string input of the source text is not supported.")
 
@@ -129,25 +133,25 @@ def convert_brat_to_con(brat_file_path, text_file_path=None):
             continue
         elif not is_valid_brat(line):
             logging.warning("Incorrectly formatted line in %s was skipped: \"%s\"." % (brat_file_path, line))
+            num_skipped_regex += 1
             continue
 
         d = line_to_dict(line)
 
         start_line_num = find_line_num(text, d["start_ind"])
-        start_text_line = text_lines[start_line_num]
-        start_line_index = get_line_index(text, start_text_line)
-        start_word_num = get_word_num(text, start_line_index, d["start_ind"])
+        start_source_line = text_lines[start_line_num]
+        start_word_num = get_word_num(start_source_line, d["start_ind"])
         start_str = str(start_line_num + 1) + ':' + str(start_word_num)
 
         end_line_num = find_line_num(text, d["end_ind"])
-        end_text_line = text_lines[end_line_num]
-        end_line_index = get_line_index(text, end_text_line)
-        end_word_num = get_word_num(text, end_line_index, d["end_ind"])
+        end_word_num = start_word_num + len(re.findall(whitespace_pattern, d["data_item"]))
         end_str = str(end_line_num + 1) + ':' + str(end_word_num)
 
         con_line = "c=\"%s\" %s %s||t=\"%s\"\n" % (d["data_item"], start_str, end_str, d['data_type'])
         output_lines += con_line
 
+        num_lines += 1
+
     return output_lines
 
 
@@ -155,13 +159,11 @@ def convert_brat_to_con(brat_file_path, text_file_path=None):
 
     # Get the input and output directories from the command line.
 
-    if not argv.__len__() >= 3:
+    if len(argv) < 3:
         # Command-line arguments must be provided for the input and output directories.
-        # Else, prints instructions and aborts the program.
-        print("Please run the program again, entering the input and output directories as command-line arguments"
-              " in that order. Optionally, enter '-c' as a final command line argument if you want to copy"
-              " the text files used in the conversion over to the output directory.")
-        exit()
+        raise IOError("Please run the program again, entering the input and output directories as command-line"
+                      " arguments in that order. Optionally, enter '-c' as a final command line argument if you want"
+                      " to copy the text files used in the conversion over to the output directory.")
 
     try:
         input_dir_name = argv[1]
@@ -188,11 +190,10 @@ def convert_brat_to_con(brat_file_path, text_file_path=None):
         raise FileNotFoundError("There were no ann files in the input directory with a corresponding text file. "
                                 "Please ensure that the input directory contains ann files and that each file has "
                                 "a corresponding txt file (see help for this program).")
-        exit()
 
-    # Create the log file
-    log_file_path = os.path.join(output_dir_name + "conversion.log")
-    logging.basicConfig(filename=log_file_path, level=logging.WARNING)
+    # Create the log
+    log_path = os.path.join(output_dir_name, "conversion.log")
+    logging.basicConfig(filename=log_path, level=logging.WARNING)
 
     for input_file_name in ann_files:
         full_file_path = os.path.join(input_dir_name, input_file_name)
@@ -203,8 +204,22 @@ def convert_brat_to_con(brat_file_path, text_file_path=None):
 
     # Paste all the text files used in the conversion process to the output directory
     # if there's a fourth command line argument and that argument is -c
-    if argv.__len__() == 4 and argv[3] == "-c":
+    if len(argv) >= 4 and argv[3] == "-c":
         text_files_with_match = [f for f in text_files if switch_extension(f, ".ann") in ann_files]
         for f in text_files_with_match:
             full_name = os.path.join(input_dir_name, f)
             shutil.copy(full_name, output_dir_name)
+
+    # Compile and print stats to log
+    stat_headers = ["Total lines", "Total converted",
+                    "Skipped did not match regex", "Percent converted"]
+
+    stat_data = [
+        num_lines,
+        num_lines - num_skipped_regex,
+        num_skipped_regex,
+        (num_lines - num_skipped_regex) / num_lines
+    ]
+
+    conversion_stats = tabulate.tabulate(headers=stat_headers, tabular_data=[stat_data])
+    logging.warning("\n" + conversion_stats)
diff --git a/medacy/tools/con_form/con_to_brat.py b/medacy/tools/converters/con_to_brat.py
similarity index 56%
rename from medacy/tools/con_form/con_to_brat.py
rename to medacy/tools/converters/con_to_brat.py
index 7460715c..1daa91ca 100644
--- a/medacy/tools/con_form/con_to_brat.py
+++ b/medacy/tools/converters/con_to_brat.py
@@ -6,16 +6,31 @@
 
 Function 'convert_con_to_brat()' can be imported independently and run on individual files.
 
-This version does not produce accurate output. Revisions are underway.
+This program can be used for conversion independently from medaCy if the Line class is copied
+and pasted into a copy of this program.
 
 :author: Steele W. Farnsworth
-:date: 18 February, 2019
+:date: 13 March, 2019
 """
 
-from sys import argv as cmd_arg, exit
-from re import split, findall, fullmatch, DOTALL
+from sys import argv, exit
+from re import split, findall, fullmatch
+from medacy.tools.converters.conversion_tools.line import Line
+import re
 import os
 import shutil
+import logging
+import tabulate
+
+
+# Regex patterns
+whitespace_pattern = "( +|\t+)+"
+con_pattern = "c=\".+?\" \d+:\d+ \d+:\d+\|\|t=\".+?\"(|\n)"
+
+# Used for stats at the end
+num_lines = 0
+num_skipped_regex = 0
+num_skipped_value_error = 0
 
 
 def is_valid_con(item: str):
@@ -25,7 +40,6 @@ def is_valid_con(item: str):
     :return: Boolean of whether or not the line matches a con regular expression.
     """
     if not isinstance(item, str): return False
-    con_pattern = "c=\".+?\" \d+:\d+ \d+:\d+\|\|t=\".+?\"(|\n)"
     if fullmatch(con_pattern, item): return True
     else: return False
 
@@ -49,25 +63,58 @@ def switch_extension(name, ext):
     return os.path.splitext(name)[0] + ext
 
 
-def get_absolute_index(txt, txt_lns, ind):
+def get_absolute_index(txt_lns, ind, entity):
     """
-    Given one of the \d+:\d+ spans, which represent the index of a char relative to the start of the line it's on,
+    Given one of the \d+:\d+ spans, which represent the index of a word relative to the start of the line it's on,
     returns the index of that char relative to the start of the file.
-    :param txt: The text file associated with the annotation.
-    :param txt_lns: The same text file as a list broken by lines
+    :param txt_lns: The list of Line objects for that file.
     :param ind: The string in format \d+:\d+
+    :param entity: The text of the entity
     :return: The absolute index
     """
 
-    # convert ind to line_num and char_num
+    # Convert ind to line_num and char_num
     nums = split(":", ind)
     line_num = int(nums[0]) - 1  # line nums in con start at 1 and not 0
-    char_num = int(nums[1])
+    word_num = int(nums[1])
 
     this_line = txt_lns[line_num]
-    line_index = txt.index(this_line)  # get the absolute index of the entire line
-    abs_index = line_index + char_num
-    return abs_index
+    line_index = this_line.index
+
+    # Get index of word following n space
+    split_by_whitespace = split(whitespace_pattern, this_line.text)
+    split_by_whitespace = [s for s in split_by_whitespace if s != '']
+    split_by_ws_no_ws = [s for s in split_by_whitespace if not s.isspace()]
+    all_whitespace = [s for s in split_by_whitespace if s.isspace()]
+
+    # Adjust word_num if first character cluster is whitespace
+    if split_by_whitespace[0].isspace():
+        line_to_target_word = split_by_ws_no_ws[:word_num - 1]
+    else:
+        line_to_target_word = split_by_ws_no_ws[:word_num]
+
+    num_non_whitespace = sum([len(w) for w in line_to_target_word])
+    num_whitespace = sum([len(w) for w in all_whitespace[:word_num]])
+
+    index_within_line = num_whitespace + num_non_whitespace
+    line_to_start_index = this_line.text[index_within_line:]
+    entity_pattern_escaped = re.escape(entity)
+    entity_pattern_spaced = re.sub(r"\\\s+", r"\s+", entity_pattern_escaped)
+
+    try:
+        # Search for entity regardless of case or composition of intermediate spaces
+        # match = re.search(entity_pattern_spaced, this_line.text, re.IGNORECASE)[0]
+        match = re.search(entity_pattern_spaced, line_to_start_index, re.IGNORECASE)[0]
+        offset = line_to_start_index.index(match)  # adjusts if entity is not the first char in its "word"
+    except (ValueError, TypeError):
+        logging.warning("""Entity not found in its expected line:
+        \t"%s"
+        \t"%s"
+        \tRevision of input data may be required; conversion for this item was skipped""" % (entity, this_line)
+        )
+        return -1
+
+    return index_within_line + line_index + offset
 
 
 def convert_con_to_brat(con_file_path, text_file_path=None):
@@ -81,6 +128,8 @@ def convert_con_to_brat(con_file_path, text_file_path=None):
     :return: A string representation of the brat file, which can then be written to file if desired.
     """
 
+    global num_lines, num_skipped_regex, num_skipped_value_error
+
     # By default, find txt file with equivalent name
     if text_file_path is None:
         text_file_path = switch_extension(con_file_path, ".txt")
@@ -89,15 +138,17 @@ def convert_con_to_brat(con_file_path, text_file_path=None):
                                     " directory")
         with open(text_file_path, 'r') as text_file:
             text = text_file.read()
-            text_lines = text.split('\n')
+            text_lines = Line.init_lines(text)
     # Else, open the file with the path passed to the function
     elif os.path.isfile(text_file_path):
         with open(text_file_path, 'r') as text_file:
             text = text_file.read()
-            text_lines = text.split('\n')
+            text_lines = Line.init_lines(text)
     else: raise FileNotFoundError("No text file path was provided or the file was not found."
                                   " Note that direct string input of the source text is not supported.")
 
+    num_lines += len(text_lines)
+
     # If con_file_path is actually a path, open it and split it into lines
     if os.path.isfile(con_file_path):
         with open(con_file_path, 'r') as con_file:
@@ -111,10 +162,17 @@ def convert_con_to_brat(con_file_path, text_file_path=None):
     output_text = ""
     t = 1
     for line in con_text_lines:
-        if not is_valid_con(line): continue
+        if line == "" or line.startswith("#"): continue
+        elif not is_valid_con(line):
+            logging.warning("Incorrectly formatted line in %s was skipped: \"%s\"." % (con_file_path, line))
+            num_skipped_regex += 1
+            continue
         d = line_to_dict(line)
-        start_ind = get_absolute_index(text, text_lines, d["start_ind"])
-        span_length = d["data_item"].__len__()
+        start_ind = get_absolute_index(text_lines, d["start_ind"], d["data_item"])
+        if start_ind == -1:
+            num_skipped_value_error += 1
+            continue  # skips data that could not be converted
+        span_length = len(d["data_item"])
         end_ind = start_ind + span_length
         output_line = "T%s\t%s %s %s\t%s\n" % (str(t), d["data_type"], str(start_ind), str(end_ind), d["data_item"])
         output_text += output_line
@@ -127,34 +185,42 @@ def convert_con_to_brat(con_file_path, text_file_path=None):
 
     # Get the input and output directories from the command line.
 
-    if not cmd_arg.__len__() >= 3:
+    if len(argv) < 3:
         # Command-line arguments must be provided for the input and output directories.
-        # Else, prints instructions and aborts the program.
-        print("Please run the program again, entering the input and output directories as command-line arguments"
-              " in that order. Optionally, enter '-c' as a final command line argument if you want to copy"
-              " the text files used in the conversion over to the output directory.")
-        exit()
+        raise IOError("Please run the program again, entering the input and output directories as command-line"
+                      " arguments in that order. Optionally, enter '-c' as a final command line argument if you want"
+                      " to copy the text files used in the conversion over to the output directory.")
 
     try:
-        input_dir_name = cmd_arg[1]
+        input_dir_name = argv[1]
         input_dir = os.listdir(input_dir_name)
     except FileNotFoundError:  # dir doesn't exist
         while not os.path.isdir(input_dir_name):
             input_dir_name = input("Input directory not found; please try another directory:")
         input_dir = os.listdir(input_dir_name)
     try:
-        output_dir_name = cmd_arg[2]
+        output_dir_name = argv[2]
         output_dir = os.listdir(output_dir_name)
     except FileNotFoundError:
         while not os.path.isdir(output_dir_name):
             output_dir_name = input("Output directory not found; please try another directory:")
             output_dir = os.listdir(output_dir_name)
 
+    # Create the log
+    log_path = os.path.join(output_dir_name, "conversion.log")
+    logging.basicConfig(filename=log_path)
+
     # Get only the text files in input_dir
     text_files = [f for f in input_dir if f.endswith(".txt")]
     # Get only the con files in input_dir that have a ".txt" equivalent
     con_files = [f for f in input_dir if f.endswith(".con") and switch_extension(f, ".txt") in text_files]
 
+    # Ensure user is aware if there are no files to convert
+    if len(con_files) < 1:
+        raise FileNotFoundError("There were no con files in the input directory with a corresponding text file. "
+                                "Please ensure that the input directory contains ann files and that each file has "
+                                "a corresponding txt file (see help for this program).")
+
     for input_file_name in con_files:
         full_file_path = os.path.join(input_dir_name, input_file_name)
         output_file_name = switch_extension(input_file_name, ".ann")
@@ -165,8 +231,23 @@ def convert_con_to_brat(con_file_path, text_file_path=None):
 
     # Paste all the text files used in the conversion process to the output directory
     # if there's a fourth command line argument and that argument is -c
-    if cmd_arg.__len__() == 4 and cmd_arg[3] == "-c":
+    if len(argv) >= 4 and argv[3] == "-c":
         text_files_with_match = [f for f in text_files if switch_extension(f, ".con") in con_files]
         for f in text_files_with_match:
             full_name = os.path.join(input_dir_name, f)
             shutil.copy(full_name, output_dir_name)
+
+    # Compile and print stats to log
+    stat_headers = ["Total lines", "Total converted", "Lines skipped", "Skipped due to value error",
+                    "Skipped did not match regex", "Percent converted"]
+    stat_data = [
+        num_lines,
+        num_lines - num_skipped_regex - num_skipped_value_error,
+        num_skipped_regex + num_skipped_value_error,
+        num_skipped_value_error,
+        num_skipped_regex,
+        (num_lines - num_skipped_regex - num_skipped_value_error) / num_lines
+    ]
+
+    conversion_stats = tabulate.tabulate(headers=stat_headers, tabular_data=[stat_data])
+    logging.warning("\n" + conversion_stats)
diff --git a/medacy/tools/converters/conversion_tools/__init__.py b/medacy/tools/converters/conversion_tools/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/medacy/tools/converters/conversion_tools/line.py b/medacy/tools/converters/conversion_tools/line.py
new file mode 100644
index 00000000..72af12de
--- /dev/null
+++ b/medacy/tools/converters/conversion_tools/line.py
@@ -0,0 +1,60 @@
+"""
+:author: Steele Farnsworth
+:date: 13 March, 2019
+"""
+
+
+class Line:
+    """
+    Represents a line of text in the text file related to an annotation file, ensuring that each line has an accurate
+    start index as one of its attributes regardless of whether that line appears more than once
+    """
+
+    def __init__(self, line_text: str, line_num: int, line_index: int):
+        self.text = line_text
+        self.num = line_num
+        self.index = line_index
+
+    @staticmethod
+    def init_lines(full_text: str):
+        """
+        Creates all the Line objects for a given text file, storing them in a list where index n is the nth - 1
+        line of the document.
+        :param full_text: The entire text of the document.
+        :return: The list of Lines.
+        """
+        global_start_ind = 0
+        global_line_num = 0
+
+        full_text_lines = full_text.split('\n')
+        text_lines = []
+
+        for given_line in full_text_lines:
+
+            sub_index = 0
+            matches = []
+            while sub_index < global_start_ind:
+                for previous_line in text_lines:
+                    if given_line == previous_line.text:
+                        matches.append(previous_line)
+                    sub_index += previous_line.index
+
+            if matches:
+                # Get the text from the end of the last match onward
+                search_text_start = matches[-1].index + len(matches[-1].text)
+                search_text = full_text[search_text_start:]
+                start_ind = search_text.index(given_line) + search_text_start
+            else:  # The line is unique so str.index() will be accurate
+                start_ind = full_text.index(given_line)
+
+            new_line = Line(given_line, global_line_num, start_ind)
+            text_lines.append(new_line)
+
+            global_start_ind = text_lines[-1].index
+            global_line_num += 1
+
+        return text_lines
+
+    def __str__(self):
+        """String representation of a line, with its index and text separated by a pipe."""
+        return "%i | %s" % (self.index, self.text)