From 573294ba752823d2fb6a708acf2533a6e14752c9 Mon Sep 17 00:00:00 2001 From: Erik Osheim Date: Sun, 15 Mar 2015 01:37:15 -0400 Subject: [PATCH 1/2] Add code to disambiguate pronouns dynamically. This commit improves the display code so that the smallest unambiguous abbreviation is calculated based on the contents of pronouns.tab. --- src/pronouns/pages.clj | 12 +++++------- src/pronouns/util.clj | 29 +++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/src/pronouns/pages.clj b/src/pronouns/pages.clj index d7ef9fc..eb0253c 100644 --- a/src/pronouns/pages.clj +++ b/src/pronouns/pages.clj @@ -90,16 +90,14 @@ (take 5 inputs) (u/table-lookup inputs pronouns-table)))) -;; we could choose to display the entire row for the label. -;; currently the first two entries are enough to disambiguate the -;; pronouns -- will that always be true? -(defn make-link [row] - (let [link (str "/" (s/join "/" row)) - label (str (first row) "/" (first (rest row)))] +(defn make-link [pair] + (let [link (str "/" (s/join "/" (second pair))) + label (str (s/join "/" (first pair)))] [:li [:a {:href link} label]])) (defn front [pronouns-table] - (let [links (map make-link (sort pronouns-table)) + (let [abbreviations (u/abbreviate (sort pronouns-table)) + links (map make-link abbreviations) title "Pronoun Island"] (html [:html diff --git a/src/pronouns/util.clj b/src/pronouns/util.clj index 1269664..1d4c09a 100644 --- a/src/pronouns/util.clj +++ b/src/pronouns/util.clj @@ -14,3 +14,32 @@ (defn tabfile-lookup [query-key tabfile] (table-lookup query-key (slurp-tabfile tabfile))) + +; given a row and its lexically-closest neighbors, +; determine the smallest abbreviation which is still +; distinct. +(defn disambiguate + [prev row next] + (loop [n 1] + (let [row-n (take n row)] + (cond + (>= n 5) row + (= row-n (take n prev)) (recur (+ n 1)) + (= row-n (take n next)) (recur (+ n 1)) + :else row-n)))) + +; given a list of pronoun rows, return a list of +; pairs, where the first item is the abbreviation +; and the second is the original pronoun row. +(defn abbreviate + [sorted-table] + (loop [acc nil + prev nil + row (first sorted-table) + todo (rest sorted-table)] + (let [next (first todo) + abbrev (disambiguate prev row next) + pair (list abbrev row) + acc2 (conj acc pair)] + (if (empty? todo) (reverse acc2) + (recur acc2 row next (rest todo)))))) From 2be8d2cd5e6b29b75aa93108bf738dd2d3b0bb21 Mon Sep 17 00:00:00 2001 From: Erik Osheim Date: Sun, 15 Mar 2015 03:12:03 -0400 Subject: [PATCH 2/2] Fix style issues brought up in review. --- src/pronouns/pages.clj | 8 ++++---- src/pronouns/util.clj | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/pronouns/pages.clj b/src/pronouns/pages.clj index eb0253c..1992328 100644 --- a/src/pronouns/pages.clj +++ b/src/pronouns/pages.clj @@ -90,14 +90,14 @@ (take 5 inputs) (u/table-lookup inputs pronouns-table)))) -(defn make-link [pair] - (let [link (str "/" (s/join "/" (second pair))) - label (str (s/join "/" (first pair)))] +(defn make-link [abbrev row] + (let [link (str "/" (s/join "/" row)) + label (s/join "/" abbrev)] [:li [:a {:href link} label]])) (defn front [pronouns-table] (let [abbreviations (u/abbreviate (sort pronouns-table)) - links (map make-link abbreviations) + links (map (fn [entry] (make-link (first entry) (second entry))) abbreviations) title "Pronoun Island"] (html [:html diff --git a/src/pronouns/util.clj b/src/pronouns/util.clj index 1d4c09a..8327568 100644 --- a/src/pronouns/util.clj +++ b/src/pronouns/util.clj @@ -15,10 +15,10 @@ [query-key tabfile] (table-lookup query-key (slurp-tabfile tabfile))) -; given a row and its lexically-closest neighbors, -; determine the smallest abbreviation which is still -; distinct. (defn disambiguate + "given a row and its lexically-closest neighbors, + determine the smallest abbreviation which is still + distinct." [prev row next] (loop [n 1] (let [row-n (take n row)] @@ -28,10 +28,10 @@ (= row-n (take n next)) (recur (+ n 1)) :else row-n)))) -; given a list of pronoun rows, return a list of -; pairs, where the first item is the abbreviation -; and the second is the original pronoun row. (defn abbreviate + "given a list of pronoun rows, return a list of + pairs, where the first item is the abbreviation + and the second is the original pronoun row." [sorted-table] (loop [acc nil prev nil