Skip to content

Commit d92134b

Browse files
Peter JohnsonPeter Johnson
authored andcommitted
improved shannon_letters_ngram
1 parent 42613a4 commit d92134b

File tree

3 files changed

+30
-21
lines changed

3 files changed

+30
-21
lines changed

evaluation_function/dev.json

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,22 @@
11
{
22
"basic_nn": {
3-
"answer": "1.0",
43
"response": "1.0",
4+
"answer": "1.0",
55
"model": "basic_nn",
66
"refresh": false
77
},
88
"shannon_letters_single": {
9-
"answer": "2.0",
109
"response": "3.0",
10+
"answer": "2.0",
1111
"model": "shannon_letters_single",
1212
"uniform": true,
1313
"word_count": "random"
1414
},
1515
"shannon_letters_ngram": {
16-
"answer": "2.0",
1716
"response": "3.0",
17+
"answer": 8,
1818
"model": "shannon_letters_ngram",
19-
"word_count": "random"
19+
"word_count": "random",
20+
"context_window": 10
2021
}
2122
}

evaluation_function/evaluation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def evaluation_function(
3434
#model_name = getattr(params, "model", "basic_nn") # default
3535
model_name = params.get("model", "basic_nn") # default
3636

37-
print(params)
37+
print(response, answer, params)
3838
try:
3939
model = getattr(models, model_name) # e.g. models.basic_nn
4040
except AttributeError:

evaluation_function/models/shannon_letters_ngram.py

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
LETTERS_PATH = MODEL_DIR / "norvig_letter_ngrams.csv"
1717
WORD_LENGTHS_PATH = MODEL_DIR / "norvig_word_length_frequencies.csv"
1818

19+
20+
1921
# Shannon's English lagnuage generator using letter frequency
2022

2123
# Relative Frequencies of Letters in General English Plain text From Cryptographical Mathematics, by Robert Edward Lewand
@@ -54,30 +56,33 @@ def sample_ngram(lookups, n, prefix="", k=1):
5456
return random.choices(data["keys"], weights=data["freqs"], k=k)
5557

5658

57-
def generate_word(n) -> str: # n is the number of letters in the word
59+
def generate_word(N,n) -> str: # N = max letters, n = context window (as in, n-gram)
5860
lookups = NGRAM_LOOKUPS
59-
n_max=n
61+
N_max=N
6062
samples = {}
6163
samples[1] = sample_ngram(lookups, n=1, prefix="", k=1)[0]
6264
print("1-gram:", samples[1]) if printing == 1 else None
63-
for i in range(2, n+1):
64-
if len(lookups)<=i: # no i-grams available → stop
65-
samples[i] = samples[i-1]+'#'
66-
n_max=i
65+
for i in range(2, N+1):
66+
if len(lookups)<=min(n,i):
67+
samples[i] = samples[i-1]+'#' # ## no i-grams available → stop
68+
N_max=i
6769
break
68-
prefix = samples[i-1] # previous (i-1)-gram is the prefix
69-
if prefix not in lookups[i]: # missing bucket → stop
70+
prefix = samples[i-1][-n+1:] # previous (i-1)-gram, last n letters
71+
if prefix not in lookups[len(prefix)+1]: # $$ missing bucket → stop
7072
if i>2:
71-
samples[i] = samples[i-1]+"#"
72-
n_max=i
73+
samples[i] = samples[i-1]+"$"
74+
N_max=i
7375
else:
74-
samples[i] = "#"
76+
samples[i] = "$"
77+
N_max = 1
7578
break
7679
else:
77-
samples[i] = sample_ngram(lookups, n=i, prefix=prefix, k=1)[0]
80+
new = sample_ngram(lookups, n=min(i,n), prefix=prefix, k=1)[0]
81+
print(f"i = {i}, N = {N}, n = {n},new string = {new}") if printing == 1 else None
82+
samples[i] = samples[i-1][:-n+1]+new
7883
print(f"{i}-gram:", samples[i]) if printing == 1 else None
7984

80-
return samples[n_max]
85+
return samples[N_max]
8186

8287
def csv_to_lists(filename: str) -> list:
8388
frequencies = []
@@ -95,11 +100,14 @@ def run(response, answer, params:Params) -> Result:
95100
word_lengths["tokens"] = [row[0] for row in data]
96101
word_lengths["weights"] = [row[1] for row in data]
97102
word_count = params.get("word_count", 10)
103+
response_used = isinstance(response, int)
104+
context_window = response if response_used else params.get("context_window", 3)
98105
if word_count == "random":
99106
word_count = random.randint(3,15)
100107
for i in range(word_count):
101108
k=int(random.choices(word_lengths["tokens"],weights=word_lengths["weights"],k=1)[0])
102-
output.append(generate_word(k))
103-
output=' '.join(output)
109+
output.append(generate_word(k,context_window))
110+
feedback_items = [("general", ' '.join(output))]
111+
feedback_items.append("| Answer not an integer; used default context window") if not response_used else None
104112
is_correct = True
105-
return Result(is_correct=is_correct,feedback_items=[("general",output)])
113+
return Result(is_correct=is_correct,feedback_items=feedback_items)

0 commit comments

Comments
 (0)